PostgreSQL 原始碼解讀(91)- 查詢語句#76(ExecHashJoin函式#2)
本節是ExecHashJoin函式介紹的第二部分,主要介紹了ExecHashJoin中依賴的其他函式的實現邏輯,包括ExecHashTableCreate、ExecChooseHashTableSize等。
一、資料結構
Plan
所有計劃節點透過將Plan結構作為第一個欄位從Plan結構“派生”。這確保了在將節點轉換為計劃節點時,一切都能正常工作。(在執行器中以通用方式傳遞時,節點指標經常被轉換為Plan *)
/* ----------------
* Plan node
*
* All plan nodes "derive" from the Plan structure by having the
* Plan structure as the first field. This ensures that everything works
* when nodes are cast to Plan's. (node pointers are frequently cast to Plan*
* when passed around generically in the executor)
* 所有計劃節點透過將Plan結構作為第一個欄位從Plan結構“派生”。
* 這確保了在將節點轉換為計劃節點時,一切都能正常工作。
* (在執行器中以通用方式傳遞時,節點指標經常被轉換為Plan *)
*
* We never actually instantiate any Plan nodes; this is just the common
* abstract superclass for all Plan-type nodes.
* 從未例項化任何Plan節點;這只是所有Plan-type節點的通用抽象超類。
* ----------------
*/
typedef struct Plan
{
NodeTag type;//節點型別
/*
* 成本估算資訊;estimated execution costs for plan (see costsize.c for more info)
*/
Cost startup_cost; /* 啟動成本;cost expended before fetching any tuples */
Cost total_cost; /* 總成本;total cost (assuming all tuples fetched) */
/*
* 最佳化器估算資訊;planner's estimate of result size of this plan step
*/
double plan_rows; /* 行數;number of rows plan is expected to emit */
int plan_width; /* 平均行大小(Byte為單位);average row width in bytes */
/*
* 並行執行相關的資訊;information needed for parallel query
*/
bool parallel_aware; /* 是否參與並行執行邏輯?engage parallel-aware logic? */
bool parallel_safe; /* 是否並行安全;OK to use as part of parallel plan? */
/*
* Plan型別節點通用的資訊.Common structural data for all Plan types.
*/
int plan_node_id; /* unique across entire final plan tree */
List *targetlist; /* target list to be computed at this node */
List *qual; /* implicitly-ANDed qual conditions */
struct Plan *lefttree; /* input plan tree(s) */
struct Plan *righttree;
List *initPlan; /* Init Plan nodes (un-correlated expr
* subselects) */
/*
* Information for management of parameter-change-driven rescanning
* parameter-change-driven重掃描的管理資訊.
*
* extParam includes the paramIDs of all external PARAM_EXEC params
* affecting this plan node or its children. setParam params from the
* node's initPlans are not included, but their extParams are.
*
* allParam includes all the extParam paramIDs, plus the IDs of local
* params that affect the node (i.e., the setParams of its initplans).
* These are _all_ the PARAM_EXEC params that affect this node.
*/
Bitmapset *extParam;
Bitmapset *allParam;
} Plan;
JoinState
Hash/NestLoop/Merge Join的基類
/* ----------------
* JoinState information
*
* Superclass for state nodes of join plans.
* Hash/NestLoop/Merge Join的基類
* ----------------
*/
typedef struct JoinState
{
PlanState ps;//基類PlanState
JoinType jointype;//連線型別
//在找到一個匹配inner tuple的時候,如需要跳轉到下一個outer tuple,則該值為T
bool single_match; /* True if we should skip to next outer tuple
* after finding one inner match */
//連線條件表示式(除了ps.qual)
ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */
} JoinState;
HashJoinState
Hash Join執行期狀態結構體
/* these structs are defined in executor/hashjoin.h: */
typedef struct HashJoinTupleData *HashJoinTuple;
typedef struct HashJoinTableData *HashJoinTable;
typedef struct HashJoinState
{
JoinState js; /* 基類;its first field is NodeTag */
ExprState *hashclauses;//hash連線條件
List *hj_OuterHashKeys; /* 外表條件連結串列;list of ExprState nodes */
List *hj_InnerHashKeys; /* 內表連線條件;list of ExprState nodes */
List *hj_HashOperators; /* 運算子OIDs連結串列;list of operator OIDs */
HashJoinTable hj_HashTable;//Hash表
uint32 hj_CurHashValue;//當前的Hash值
int hj_CurBucketNo;//當前的bucket編號
int hj_CurSkewBucketNo;//行傾斜bucket編號
HashJoinTuple hj_CurTuple;//當前元組
TupleTableSlot *hj_OuterTupleSlot;//outer relation slot
TupleTableSlot *hj_HashTupleSlot;//Hash tuple slot
TupleTableSlot *hj_NullOuterTupleSlot;//用於外連線的outer虛擬slot
TupleTableSlot *hj_NullInnerTupleSlot;//用於外連線的inner虛擬slot
TupleTableSlot *hj_FirstOuterTupleSlot;//
int hj_JoinState;//JoinState狀態
bool hj_MatchedOuter;//是否匹配
bool hj_OuterNotEmpty;//outer relation是否為空
} HashJoinState;
HashJoinTable
Hash表資料結構
typedef struct HashJoinTableData
{
int nbuckets; /* 記憶體中的hash桶數;# buckets in the in-memory hash table */
int log2_nbuckets; /* 2的對數(nbuckets必須是2的冪);its log2 (nbuckets must be a power of 2) */
int nbuckets_original; /* 首次hash時的桶數;# buckets when starting the first hash */
int nbuckets_optimal; /* 最佳化後的桶數(每個批次);optimal # buckets (per batch) */
int log2_nbuckets_optimal; /* 2的對數;log2(nbuckets_optimal) */
/* buckets[i] is head of list of tuples in i'th in-memory bucket */
//bucket [i]是記憶體中第i個桶中的元組連結串列的head item
union
{
/* unshared array is per-batch storage, as are all the tuples */
//未共享陣列是按批處理儲存的,所有元組均如此
struct HashJoinTupleData **unshared;
/* shared array is per-query DSA area, as are all the tuples */
//共享陣列是每個查詢的DSA區域,所有元組均如此
dsa_pointer_atomic *shared;
} buckets;
bool keepNulls; /*如不匹配則儲存NULL元組,該值為T;true to store unmatchable NULL tuples */
bool skewEnabled; /*是否使用傾斜最佳化?;are we using skew optimization? */
HashSkewBucket **skewBucket; /* 傾斜的hash表桶數;hashtable of skew buckets */
int skewBucketLen; /* skewBucket陣列大小;size of skewBucket array (a power of 2!) */
int nSkewBuckets; /* 活動的傾斜桶數;number of active skew buckets */
int *skewBucketNums; /* 活動傾斜桶陣列索引;array indexes of active skew buckets */
int nbatch; /* 批次數;number of batches */
int curbatch; /* 當前批次,第一輪為0;current batch #; 0 during 1st pass */
int nbatch_original; /* 在開始inner掃描時的批次;nbatch when we started inner scan */
int nbatch_outstart; /* 在開始outer掃描時的批次;nbatch when we started outer scan */
bool growEnabled; /* 關閉nbatch增加的標記;flag to shut off nbatch increases */
double totalTuples; /* 從inner plan獲得的元組數;# tuples obtained from inner plan */
double partialTuples; /* 透過hashjoin獲得的inner元組數;# tuples obtained from inner plan by me */
double skewTuples; /* 傾斜元組數;# tuples inserted into skew tuples */
/*
* These arrays are allocated for the life of the hash join, but only if
* nbatch > 1. A file is opened only when we first write a tuple into it
* (otherwise its pointer remains NULL). Note that the zero'th array
* elements never get used, since we will process rather than dump out any
* tuples of batch zero.
* 這些陣列在雜湊連線的生命週期內分配,但僅當nbatch > 1時分配。
* 只有當第一次將元組寫入檔案時,檔案才會開啟(否則它的指標將保持NULL)。
* 注意,第0個陣列元素永遠不會被使用,因為批次0的元組永遠不會轉儲.
*/
BufFile **innerBatchFile; /* 每個批次的inner虛擬臨時檔案快取;buffered virtual temp file per batch */
BufFile **outerBatchFile; /* 每個批次的outer虛擬臨時檔案快取;buffered virtual temp file per batch */
/*
* Info about the datatype-specific hash functions for the datatypes being
* hashed. These are arrays of the same length as the number of hash join
* clauses (hash keys).
* 有關正在雜湊的資料型別的特定於資料型別的雜湊函式的資訊。
* 這些陣列的長度與雜湊連線子句(雜湊鍵)的數量相同。
*/
FmgrInfo *outer_hashfunctions; /* outer hash函式FmgrInfo結構體;lookup data for hash functions */
FmgrInfo *inner_hashfunctions; /* inner hash函式FmgrInfo結構體;lookup data for hash functions */
bool *hashStrict; /* 每個hash運算子是嚴格?is each hash join operator strict? */
Size spaceUsed; /* 元組使用的當前記憶體空間大小;memory space currently used by tuples */
Size spaceAllowed; /* 空間使用上限;upper limit for space used */
Size spacePeak; /* 峰值的空間使用;peak space used */
Size spaceUsedSkew; /* 傾斜雜湊表的當前空間使用情況;skew hash table's current space usage */
Size spaceAllowedSkew; /* 傾斜雜湊表的使用上限;upper limit for skew hashtable */
MemoryContext hashCxt; /* 整個雜湊連線儲存的上下文;context for whole-hash-join storage */
MemoryContext batchCxt; /* 該批次儲存的上下文;context for this-batch-only storage */
/* used for dense allocation of tuples (into linked chunks) */
//用於密集分配元組(到連結塊中)
HashMemoryChunk chunks; /* 整個批次使用一個連結串列;one list for the whole batch */
/* Shared and private state for Parallel Hash. */
//並行hash使用的共享和私有狀態
HashMemoryChunk current_chunk; /* 後臺程式的當前chunk;this backend's current chunk */
dsa_area *area; /* 用於分配記憶體的DSA區域;DSA area to allocate memory from */
ParallelHashJoinState *parallel_state;//並行執行狀態
ParallelHashJoinBatchAccessor *batches;//並行訪問器
dsa_pointer current_chunk_shared;//當前chunk的開始指標
} HashJoinTableData;
typedef struct HashJoinTableData *HashJoinTable;
HashJoinTupleData
Hash連線元組資料
/* ----------------------------------------------------------------
* hash-join hash table structures
*
* Each active hashjoin has a HashJoinTable control block, which is
* palloc'd in the executor's per-query context. All other storage needed
* for the hashjoin is kept in private memory contexts, two for each hashjoin.
* This makes it easy and fast to release the storage when we don't need it
* anymore. (Exception: data associated with the temp files lives in the
* per-query context too, since we always call buffile.c in that context.)
* 每個活動的hashjoin都有一個可雜湊的控制塊,它在執行程式的每個查詢上下文中都是透過palloc分配的。
* hashjoin所需的所有其他儲存都儲存在私有記憶體上下文中,每個hashjoin有兩個。
* 當不再需要它的時候,這使得釋放它變得簡單和快速。
* (例外:與臨時檔案相關的資料也存在於每個查詢上下文中,因為在這種情況下總是呼叫buffile.c。)
*
* The hashtable contexts are made children of the per-query context, ensuring
* that they will be discarded at end of statement even if the join is
* aborted early by an error. (Likewise, any temporary files we make will
* be cleaned up by the virtual file manager in event of an error.)
* hashtable上下文是每個查詢上下文的子上下文,確保在語句結束時丟棄它們,即使連線因錯誤而提前中止。
* (同樣,如果出現錯誤,虛擬檔案管理器將清理建立的任何臨時檔案。)
*
* Storage that should live through the entire join is allocated from the
* "hashCxt", while storage that is only wanted for the current batch is
* allocated in the "batchCxt". By resetting the batchCxt at the end of
* each batch, we free all the per-batch storage reliably and without tedium.
* 透過整個連線的儲存空間應從“hashCxt”分配,而只需要當前批處理的儲存空間在“batchCxt”中分配。
* 透過在每個批處理結束時重置batchCxt,可以可靠地釋放每個批處理的所有儲存,而不會感到單調乏味。
*
* During first scan of inner relation, we get its tuples from executor.
* If nbatch > 1 then tuples that don't belong in first batch get saved
* into inner-batch temp files. The same statements apply for the
* first scan of the outer relation, except we write tuples to outer-batch
* temp files. After finishing the first scan, we do the following for
* each remaining batch:
* 1. Read tuples from inner batch file, load into hash buckets.
* 2. Read tuples from outer batch file, match to hash buckets and output.
* 在內部關係的第一次掃描中,從執行者那裡得到了它的元組。
* 如果nbatch > 1,那麼不屬於第一批的元組將儲存到批內臨時檔案中。
* 相同的語句適用於外關係的第一次掃描,但是我們將元組寫入外部批處理臨時檔案。
* 完成第一次掃描後,我們對每批剩餘的元組做如下處理:
* 1.從內部批處理檔案讀取元組,載入到雜湊桶中。
* 2.從外部批處理檔案讀取元組,匹配雜湊桶和輸出。
*
* It is possible to increase nbatch on the fly if the in-memory hash table
* gets too big. The hash-value-to-batch computation is arranged so that this
* can only cause a tuple to go into a later batch than previously thought,
* never into an earlier batch. When we increase nbatch, we rescan the hash
* table and dump out any tuples that are now of a later batch to the correct
* inner batch file. Subsequently, while reading either inner or outer batch
* files, we might find tuples that no longer belong to the current batch;
* if so, we just dump them out to the correct batch file.
* 如果記憶體中的雜湊表太大,可以動態增加nbatch。
* 雜湊值到批處理的計算是這樣安排的:
* 這隻會導致元組進入比以前認為的更晚的批處理,而不會進入更早的批處理。
* 當增加nbatch時,重新掃描雜湊表,並將現在屬於後面批處理的任何元組轉儲到正確的內部批處理檔案。
* 隨後,在讀取內部或外部批處理檔案時,可能會發現不再屬於當前批處理的元組;
* 如果是這樣,只需將它們轉儲到正確的批處理檔案即可。
* ----------------------------------------------------------------
*/
/* these are in nodes/execnodes.h: */
/* typedef struct HashJoinTupleData *HashJoinTuple; */
/* typedef struct HashJoinTableData *HashJoinTable; */
typedef struct HashJoinTupleData
{
/* link to next tuple in same bucket */
//link同一個桶中的下一個元組
union
{
struct HashJoinTupleData *unshared;
dsa_pointer shared;
} next;
uint32 hashvalue; /* 元組的hash值;tuple's hash code */
/* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
} HashJoinTupleData;
#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
#define HJTUPLE_MINTUPLE(hjtup) \
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
二、原始碼解讀
ExecHashTableCreate
ExecHashTableCreate函式初始化hashjoin需要使用的hashtable.
/*----------------------------------------------------------------------------------------------------
HJ_BUILD_HASHTABLE 階段
-----------------------------------------------------------------------------------------------------*/
/* ----------------
* these are defined to avoid confusion problems with "left"
* and "right" and "inner" and "outer". The convention is that
* the "left" plan is the "outer" plan and the "right" plan is
* the inner plan, but these make the code more readable.
* 這些定義是為了避免“左”和“右”以及“內”和“外”的混淆問題。
* 約定是,“左”計劃是“外部”計劃,“右”計劃是內部計劃,但是這些計劃使程式碼更具可讀性。
* ----------------
*/
#define innerPlan(node) (((Plan *)(node))->righttree)
#define outerPlan(node) (((Plan *)(node))->lefttree)
/* ----------------------------------------------------------------
* ExecHashTableCreate
*
* create an empty hashtable data structure for hashjoin.
* 初始化hashjoin需要使用的hashtable.
* ----------------------------------------------------------------
*/
HashJoinTable
ExecHashTableCreate(HashState *state, List *hashOperators, bool keepNulls)
{
Hash *node;
HashJoinTable hashtable;
Plan *outerNode;
size_t space_allowed;
int nbuckets;
int nbatch;
double rows;
int num_skew_mcvs;
int log2_nbuckets;
int nkeys;
int i;
ListCell *ho;
MemoryContext oldcxt;
/*
* Get information about the size of the relation to be hashed (it's the
* "outer" subtree of this node, but the inner relation of the hashjoin).
* Compute the appropriate size of the hash table.
* 獲取有關要雜湊的關係大小的資訊(它是該節點的“outer”子樹,hashjoin的inner relation)。
* 計算雜湊表的適當大小。
*/
node = (Hash *) state->ps.plan;//獲取Hash節點
outerNode = outerPlan(node);//獲取outer relation Plan節點
/*
* If this is shared hash table with a partial plan, then we can't use
* outerNode->plan_rows to estimate its size. We need an estimate of the
* total number of rows across all copies of the partial plan.
* 如果這是帶有部分計劃(並行處理)的共享雜湊表,那麼不能使用outerNode->plan_rows來估計它的大小。
* 需要估算跨部分計劃的所有副本的行總數。
*/
rows = node->plan.parallel_aware ? node->rows_total : outerNode->plan_rows;//獲取總行數
ExecChooseHashTableSize(rows, outerNode->plan_width,
OidIsValid(node->skewTable),
state->parallel_state != NULL,
state->parallel_state != NULL ?
state->parallel_state->nparticipants - 1 : 0,
&space_allowed,
&nbuckets, &nbatch, &num_skew_mcvs);//計算Hash Table的大小尺寸
/* nbuckets must be a power of 2 */
//nbuckets(hash桶數)必須是2的n次方
log2_nbuckets = my_log2(nbuckets);
Assert(nbuckets == (1 << log2_nbuckets));
/*
* Initialize the hash table control block.
* 初始化hash表的控制塊
*
* The hashtable control block is just palloc'd from the executor's
* per-query memory context. Everything else should be kept inside the
* subsidiary hashCxt or batchCxt.
* hashtable控制元件塊是從執行程式的每個查詢記憶體上下文中調取的。
* 其他內容都應該儲存在附屬hashCxt或batchCxt中。
*/
hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData));//分配記憶體
hashtable->nbuckets = nbuckets;//桶數
hashtable->nbuckets_original = nbuckets;
hashtable->nbuckets_optimal = nbuckets;
hashtable->log2_nbuckets = log2_nbuckets;
hashtable->log2_nbuckets_optimal = log2_nbuckets;
hashtable->buckets.unshared = NULL;
hashtable->keepNulls = keepNulls;
hashtable->skewEnabled = false;
hashtable->skewBucket = NULL;
hashtable->skewBucketLen = 0;
hashtable->nSkewBuckets = 0;
hashtable->skewBucketNums = NULL;
hashtable->nbatch = nbatch;
hashtable->curbatch = 0;
hashtable->nbatch_original = nbatch;
hashtable->nbatch_outstart = nbatch;
hashtable->growEnabled = true;
hashtable->totalTuples = 0;
hashtable->partialTuples = 0;
hashtable->skewTuples = 0;
hashtable->innerBatchFile = NULL;
hashtable->outerBatchFile = NULL;
hashtable->spaceUsed = 0;
hashtable->spacePeak = 0;
hashtable->spaceAllowed = space_allowed;
hashtable->spaceUsedSkew = 0;
hashtable->spaceAllowedSkew =
hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100;
hashtable->chunks = NULL;
hashtable->current_chunk = NULL;
hashtable->parallel_state = state->parallel_state;
hashtable->area = state->ps.state->es_query_dsa;
hashtable->batches = NULL;
#ifdef HJDEBUG
printf("Hashjoin %p: initial nbatch = %d, nbuckets = %d\n",
hashtable, nbatch, nbuckets);
#endif
/*
* Create temporary memory contexts in which to keep the hashtable working
* storage. See notes in executor/hashjoin.h.
* 建立臨時記憶體上下文,以便在其中保持雜湊表的相關資訊。
* 參見executor/hashjoin.h中的註釋。
*/
hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext,
"HashTableContext",
ALLOCSET_DEFAULT_SIZES);
hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt,
"HashBatchContext",
ALLOCSET_DEFAULT_SIZES);
/* Allocate data that will live for the life of the hashjoin */
//分配記憶體,切換至hashCxt
oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
/*
* Get info about the hash functions to be used for each hash key. Also
* remember whether the join operators are strict.
* 獲取關於每個雜湊鍵要使用的雜湊函式的資訊。
* 還要記住連線運算子是否嚴格。
*/
nkeys = list_length(hashOperators);//鍵值數
hashtable->outer_hashfunctions =
(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));//outer relation所使用的hash函式
hashtable->inner_hashfunctions =
(FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));//inner relation所使用的hash函式
hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));//是否嚴格的運算子
i = 0;
foreach(ho, hashOperators)//遍歷hash運算子
{
Oid hashop = lfirst_oid(ho);//hash運算子
Oid left_hashfn;//左函式
Oid right_hashfn;//右函式
//獲取與給定運算子相容的標準雜湊函式的OID,並根據需要對其LHS和/或RHS資料型別進行操作。
if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn))//獲取hash函式
elog(ERROR, "could not find hash function for hash operator %u",
hashop);
fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
hashtable->hashStrict[i] = op_strict(hashop);
i++;
}
if (nbatch > 1 && hashtable->parallel_state == NULL)//批次>1而且並行狀態為NULL
{
/*
* allocate and initialize the file arrays in hashCxt (not needed for
* parallel case which uses shared tuplestores instead of raw files)
* 在hashCxt中分配和初始化檔案陣列(對於使用共享tuplestore而不是原始檔案的並行情況不需要)
*/
hashtable->innerBatchFile = (BufFile **)
palloc0(nbatch * sizeof(BufFile *));//用於快取該批次的inner relation的tuple
hashtable->outerBatchFile = (BufFile **)
palloc0(nbatch * sizeof(BufFile *));//用於快取該批次的outerr relation的tuple
/* The files will not be opened until needed... */
/* ... but make sure we have temp tablespaces established for them */
//這些檔案需要時才會開啟……
//…但是要確保為它們建立了臨時表空間
PrepareTempTablespaces();
}
MemoryContextSwitchTo(oldcxt);//切換回原記憶體上下文
if (hashtable->parallel_state)//並行處理
{
ParallelHashJoinState *pstate = hashtable->parallel_state;
Barrier *build_barrier;
/*
* Attach to the build barrier. The corresponding detach operation is
* in ExecHashTableDetach. Note that we won't attach to the
* batch_barrier for batch 0 yet. We'll attach later and start it out
* in PHJ_BATCH_PROBING phase, because batch 0 is allocated up front
* and then loaded while hashing (the standard hybrid hash join
* algorithm), and we'll coordinate that using build_barrier.
*/
build_barrier = &pstate->build_barrier;
BarrierAttach(build_barrier);
/*
* So far we have no idea whether there are any other participants,
* and if so, what phase they are working on. The only thing we care
* about at this point is whether someone has already created the
* SharedHashJoinBatch objects and the hash table for batch 0. One
* backend will be elected to do that now if necessary.
*/
if (BarrierPhase(build_barrier) == PHJ_BUILD_ELECTING &&
BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_ELECTING))
{
pstate->nbatch = nbatch;
pstate->space_allowed = space_allowed;
pstate->growth = PHJ_GROWTH_OK;
/* Set up the shared state for coordinating batches. */
ExecParallelHashJoinSetUpBatches(hashtable, nbatch);
/*
* Allocate batch 0's hash table up front so we can load it
* directly while hashing.
*/
pstate->nbuckets = nbuckets;
ExecParallelHashTableAlloc(hashtable, 0);
}
/*
* The next Parallel Hash synchronization point is in
* MultiExecParallelHash(), which will progress it all the way to
* PHJ_BUILD_DONE. The caller must not return control from this
* executor node between now and then.
*/
}
else//非並行處理
{
/*
* Prepare context for the first-scan space allocations; allocate the
* hashbucket array therein, and set each bucket "empty".
* 為第一次掃描空間分配準備上下文;在其中分配hashbucket陣列,並將每個bucket設定為“空”。
*/
MemoryContextSwitchTo(hashtable->batchCxt);//切換上下文
hashtable->buckets.unshared = (HashJoinTuple *)
palloc0(nbuckets * sizeof(HashJoinTuple));//分配記憶體空間
/*
* Set up for skew optimization, if possible and there's a need for
* more than one batch. (In a one-batch join, there's no point in
* it.)
* 如需要多個批處理,設定傾斜最佳化。(在單批處理連線中,這是沒有意義的。)
*/
if (nbatch > 1)
ExecHashBuildSkewHash(hashtable, node, num_skew_mcvs);
MemoryContextSwitchTo(oldcxt);//切換上下文
}
return hashtable;//返回Hash表
}
/*
* This routine fills a FmgrInfo struct, given the OID
* of the function to be called.
* 給定要呼叫的函式的OID,這個例程填充一個FmgrInfo結構體。
*
* The caller's CurrentMemoryContext is used as the fn_mcxt of the info
* struct; this means that any subsidiary data attached to the info struct
* (either by fmgr_info itself, or later on by a function call handler)
* will be allocated in that context. The caller must ensure that this
* context is at least as long-lived as the info struct itself. This is
* not a problem in typical cases where the info struct is on the stack or
* in freshly-palloc'd space. However, if one intends to store an info
* struct in a long-lived table, it's better to use fmgr_info_cxt.
* 呼叫方的CurrentMemoryContext用作info結構體的fn_mcxt;
* 這意味著附加到info結構體的任何附屬資料(透過fmgr_info本身,或者稍後透過函式呼叫處理程式)將在該上下文中分配。
* 呼叫者必須確保這個上下文的生命週期至少與info結構本身一樣。
* 在資訊結構位於堆疊上或在新palloc空間中的典型情況下,這不是一個問題。
* 但是,如果希望在long-lived表中儲存資訊結構,最好使用fmgr_info_cxt。
*/
void
fmgr_info(Oid functionId, FmgrInfo *finfo)
{
fmgr_info_cxt_security(functionId, finfo, CurrentMemoryContext, false);
}
ExecChooseHashTableSize
ExecChooseHashTableSize函式根據給定要雜湊的關係的估計大小(行數和平均行寬),計算適當的雜湊表大小。
/*
* Compute appropriate size for hashtable given the estimated size of the
* relation to be hashed (number of rows and average row width).
* 給定要雜湊的關係的估計大小(行數和平均行寬),計算適當的雜湊表大小。
*
* This is exported so that the planner's costsize.c can use it.
* 這些資訊已匯出以便計劃器costsize.c可以使用
*/
/* Target bucket loading (tuples per bucket) */
#define NTUP_PER_BUCKET 1
void
ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
bool try_combined_work_mem,
int parallel_workers,
size_t *space_allowed,
int *numbuckets,
int *numbatches,
int *num_skew_mcvs)
{
int tupsize;//元組大小
double inner_rel_bytes;//inner relation大小
long bucket_bytes;//桶大小
long hash_table_bytes;//hash table大小
long skew_table_bytes;//傾斜表大小
long max_pointers;//最大的指標數
long mppow2;//
int nbatch = 1;//批次
int nbuckets;//桶數
double dbuckets;//
/* Force a plausible relation size if no info */
//如relation大小沒有資訊,則設定為預設值1000.0
if (ntuples <= 0.0)
ntuples = 1000.0;
/*
* Estimate tupsize based on footprint of tuple in hashtable... note this
* does not allow for any palloc overhead. The manipulations of spaceUsed
* don't count palloc overhead either.
* 根據雜湊表中tuple的佔用空間估計tupsize…
* 注意,這不允許任何palloc開銷。使用的空間操作也不包括palloc開銷。
*/
tupsize = HJTUPLE_OVERHEAD +
MAXALIGN(SizeofMinimalTupleHeader) +
MAXALIGN(tupwidth);//估算元組大小
inner_rel_bytes = ntuples * tupsize;//inner relation大小
/*
* Target in-memory hashtable size is work_mem kilobytes.
* 目標記憶體中的雜湊表大小為work_mem KB。
*/
hash_table_bytes = work_mem * 1024L;
/*
* Parallel Hash tries to use the combined work_mem of all workers to
* avoid the need to batch. If that won't work, it falls back to work_mem
* per worker and tries to process batches in parallel.
* 並行雜湊試圖使用所有worker的所有work_mem來避免分批處理。
* 如果這不起作用,它將返回到每個worker的work_mem,並嘗試並行處理批處理。
*/
if (try_combined_work_mem)//嘗試融合work_mem
hash_table_bytes += hash_table_bytes * parallel_workers;
*space_allowed = hash_table_bytes;
/*
* If skew optimization is possible, estimate the number of skew buckets
* that will fit in the memory allowed, and decrement the assumed space
* available for the main hash table accordingly.
* 如果可以進行傾斜最佳化,估算允許記憶體中容納的傾斜桶的數量,並相應地減少主雜湊表的假定可用空間。
*
* We make the optimistic assumption that each skew bucket will contain
* one inner-relation tuple. If that turns out to be low, we will recover
* at runtime by reducing the number of skew buckets.
* 我們樂觀地假設,每個傾斜桶將包含一個內部關係元組。
* 如果結果很低,將透過減少傾斜桶的數量在執行時進行恢復。
*
* hashtable->skewBucket will have up to 8 times as many HashSkewBucket
* pointers as the number of MCVs we allow, since ExecHashBuildSkewHash
* will round up to the next power of 2 and then multiply by 4 to reduce
* collisions.
* hashtable->skewBucket的指標數量將是允許的mcv數量的8倍,
* 因為ExecHashBuildSkewHash將四捨五入到下一個2次方,然後乘以4以減少衝突。
*/
if (useskew)
{
//傾斜最佳化
skew_table_bytes = hash_table_bytes * SKEW_WORK_MEM_PERCENT / 100;
/*----------
* Divisor is:
* size of a hash tuple +
* worst-case size of skewBucket[] per MCV +
* size of skewBucketNums[] entry +
* size of skew bucket struct itself
*----------
*/
*num_skew_mcvs = skew_table_bytes / (tupsize +
(8 * sizeof(HashSkewBucket *)) +
sizeof(int) +
SKEW_BUCKET_OVERHEAD);
if (*num_skew_mcvs > 0)
hash_table_bytes -= skew_table_bytes;
}
else
*num_skew_mcvs = 0;//不使用傾斜最佳化,預設為0
/*
* Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
* memory is filled, assuming a single batch; but limit the value so that
* the pointer arrays we'll try to allocate do not exceed work_mem nor
* MaxAllocSize.
* 設定nbuckets,假設為單批處理,當記憶體被填滿時,實現NTUP_PER_BUCKET的平均桶負載;
* 但是要限制這個值,以便試圖分配的指標陣列不會超過work_mem或MaxAllocSize。
*
* Note that both nbuckets and nbatch must be powers of 2 to make
* ExecHashGetBucketAndBatch fast.
* 注意,nbucket和nbatch都必須是2的冪,才能使ExecHashGetBucketAndBatch更快。
*/
max_pointers = *space_allowed / sizeof(HashJoinTuple);//最大指標數
max_pointers = Min(max_pointers, MaxAllocSize / sizeof(HashJoinTuple));//控制上限
/* If max_pointers isn't a power of 2, must round it down to one */
//如果max_pointer不是2的冪,則必須四捨五入到符合規則的某個值(如110.1 --> 128)
mppow2 = 1L << my_log2(max_pointers);
if (max_pointers != mppow2)
max_pointers = mppow2 / 2;
/* Also ensure we avoid integer overflow in nbatch and nbuckets */
/* (this step is redundant given the current value of MaxAllocSize) */
//還要確保在nbatch和nbucket中避免整數溢位
//(鑑於MaxAllocSize的當前值,此步驟是多餘的)
max_pointers = Min(max_pointers, INT_MAX / 2);//設定上限
dbuckets = ceil(ntuples / NTUP_PER_BUCKET);//取整
dbuckets = Min(dbuckets, max_pointers);//設定上限
nbuckets = (int) dbuckets;//桶數
/* don't let nbuckets be really small, though ... */
//但是,不要讓nbucket非常小……
nbuckets = Max(nbuckets, 1024);//設定下限(1024)
/* ... and force it to be a power of 2. */
//2的冪
nbuckets = 1 << my_log2(nbuckets);
/*
* If there's not enough space to store the projected number of tuples and
* the required bucket headers, we will need multiple batches.
* 如果沒有足夠的空間來儲存預計的元組數量和所需的bucket headers,將需要多個批處理。
*/
bucket_bytes = sizeof(HashJoinTuple) * nbuckets;
if (inner_rel_bytes + bucket_bytes > hash_table_bytes)//inner relation大小 + 桶數大於可用空間
{
/* We'll need multiple batches */
//需要多批次
long lbuckets;
double dbatch;
int minbatch;
long bucket_size;
/*
* If Parallel Hash with combined work_mem would still need multiple
* batches, we'll have to fall back to regular work_mem budget.
* 如果合併了work_mem的並行雜湊仍然需要多個批處理,將不得不回到常規的work_mem預算。
*/
if (try_combined_work_mem)
{
ExecChooseHashTableSize(ntuples, tupwidth, useskew,
false, parallel_workers,
space_allowed,
numbuckets,
numbatches,
num_skew_mcvs);
return;
}
/*
* Estimate the number of buckets we'll want to have when work_mem is
* entirely full. Each bucket will contain a bucket pointer plus
* NTUP_PER_BUCKET tuples, whose projected size already includes
* overhead for the hash code, pointer to the next tuple, etc.
* 估計work_mem完全用完時需要的桶數。
* 每個桶將包含一個桶指標和NTUP_PER_BUCKET元組,
* 其投影大小已經包括雜湊碼的開銷、指向下一個元組的指標等等。
*/
bucket_size = (tupsize * NTUP_PER_BUCKET + sizeof(HashJoinTuple));//桶大小
lbuckets = 1L << my_log2(hash_table_bytes / bucket_size);
lbuckets = Min(lbuckets, max_pointers);
nbuckets = (int) lbuckets;
nbuckets = 1 << my_log2(nbuckets);
bucket_bytes = nbuckets * sizeof(HashJoinTuple);
/*
* Buckets are simple pointers to hashjoin tuples, while tupsize
* includes the pointer, hash code, and MinimalTupleData. So buckets
* should never really exceed 25% of work_mem (even for
* NTUP_PER_BUCKET=1); except maybe for work_mem values that are not
* 2^N bytes, where we might get more because of doubling. So let's
* look for 50% here.
* Buckets是指向hashjoin元組的簡單指標,而tupsize包含指標、雜湊程式碼和MinimalTupleData。
* 所以Buckets的實際大小不應該超過work_mem的25%(即使對於NTUP_PER_BUCKET=1);
* 除了work_mem值不是2 ^ N個位元組這個原因外,翻倍可能會得到更多的,這裡試著使用50%
*/
Assert(bucket_bytes <= hash_table_bytes / 2);
/* Calculate required number of batches. */
//計算批次數
dbatch = ceil(inner_rel_bytes / (hash_table_bytes - bucket_bytes));
dbatch = Min(dbatch, max_pointers);
minbatch = (int) dbatch;
nbatch = 2;
while (nbatch < minbatch)
nbatch <<= 1;
}
Assert(nbuckets > 0);
Assert(nbatch > 0);
*numbuckets = nbuckets;
*numbatches = nbatch;
}
三、跟蹤分析
測試指令碼如下
testdb=# set enable_nestloop=false;
SET
testdb=# set enable_mergejoin=false;
SET
testdb=# explain verbose select dw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.je
testdb-# from t_dwxx dw,lateral (select gr.grbh,gr.xm,jf.ny,jf.je
testdb(# from t_grxx gr inner join t_jfxx jf
testdb(# on gr.dwbh = dw.dwbh
testdb(# and gr.grbh = jf.grbh) grjf
testdb-# order by dw.dwbh;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Sort (cost=14828.83..15078.46 rows=99850 width=47)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je
Sort Key: dw.dwbh
-> Hash Join (cost=3176.00..6537.55 rows=99850 width=47)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je
Hash Cond: ((gr.grbh)::text = (jf.grbh)::text)
-> Hash Join (cost=289.00..2277.61 rows=99850 width=32)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm
Inner Unique: true
Hash Cond: ((gr.dwbh)::text = (dw.dwbh)::text)
-> Seq Scan on public.t_grxx gr (cost=0.00..1726.00 rows=100000 width=16)
Output: gr.dwbh, gr.grbh, gr.xm, gr.xb, gr.nl
-> Hash (cost=164.00..164.00 rows=10000 width=20)
Output: dw.dwmc, dw.dwbh, dw.dwdz
-> Seq Scan on public.t_dwxx dw (cost=0.00..164.00 rows=10000 width=20)
Output: dw.dwmc, dw.dwbh, dw.dwdz
-> Hash (cost=1637.00..1637.00 rows=100000 width=20)
Output: jf.ny, jf.je, jf.grbh
-> Seq Scan on public.t_jfxx jf (cost=0.00..1637.00 rows=100000 width=20)
Output: jf.ny, jf.je, jf.grbh
(20 rows)
啟動gdb,設定斷點,進入ExecHashTableCreate
(gdb) b ExecHashTableCreate
Breakpoint 1 at 0x6fc75d: file nodeHash.c, line 449.
(gdb) c
Continuing.
Breakpoint 1, ExecHashTableCreate (state=0x1e3cbc8, hashOperators=0x1e59890, keepNulls=false) at nodeHash.c:449
449 node = (Hash *) state->ps.plan;
獲取相關資訊
449 node = (Hash *) state->ps.plan;
(gdb) n
450 outerNode = outerPlan(node);
(gdb)
457 rows = node->plan.parallel_aware ? node->rows_total : outerNode->plan_rows;
(gdb)
462 state->parallel_state != NULL ?
(gdb)
459 ExecChooseHashTableSize(rows, outerNode->plan_width,
(gdb)
獲取Hash節點;
outer節點為順序掃描SeqScan節點
inner(構造hash表的relation)行數為10000
(gdb) p *node
$1 = {plan = {type = T_Hash, startup_cost = 164, total_cost = 164, plan_rows = 10000, plan_width = 20,
parallel_aware = false, parallel_safe = true, plan_node_id = 4, targetlist = 0x1e4bf90, qual = 0x0,
lefttree = 0x1e493e8, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}, skewTable = 16977,
skewColumn = 1, skewInherit = false, rows_total = 0}
(gdb) p *outerNode
$2 = {type = T_SeqScan, startup_cost = 0, total_cost = 164, plan_rows = 10000, plan_width = 20, parallel_aware = false,
parallel_safe = true, plan_node_id = 5, targetlist = 0x1e492b0, qual = 0x0, lefttree = 0x0, righttree = 0x0,
initPlan = 0x0, extParam = 0x0, allParam = 0x0}
(gdb) p rows
$3 = 10000
(gdb)
進入ExecChooseHashTableSize函式
(gdb) step
ExecChooseHashTableSize (ntuples=10000, tupwidth=20, useskew=true, try_combined_work_mem=false, parallel_workers=0,
space_allowed=0x7ffdcf148540, numbuckets=0x7ffdcf14853c, numbatches=0x7ffdcf148538, num_skew_mcvs=0x7ffdcf148534)
at nodeHash.c:677
677 int nbatch = 1;
ExecChooseHashTableSize->計算元組大小(56B)/inner relation大小(約560K)/hash表空間(16M)
(gdb) n
682 if (ntuples <= 0.0)
(gdb)
690 tupsize = HJTUPLE_OVERHEAD +
(gdb)
693 inner_rel_bytes = ntuples * tupsize;
(gdb)
698 hash_table_bytes = work_mem * 1024L;
(gdb)
705 if (try_combined_work_mem)
(gdb) p tupsize
$4 = 56
(gdb) p inner_rel_bytes
$5 = 560000
(gdb) p hash_table_bytes
$6 = 16777216
ExecChooseHashTableSize->使用資料傾斜最佳化(所需空間從Hash Table中獲取)
(gdb) n
708 *space_allowed = hash_table_bytes;
(gdb)
724 if (useskew)
(gdb)
726 skew_table_bytes = hash_table_bytes * SKEW_WORK_MEM_PERCENT / 100;
(gdb) p useskew
$8 = true
(gdb) p hash_table_bytes
$9 = 16441672
(gdb) p skew_table_bytes
$10 = 335544
(gdb) p num_skew_mcvs
$11 = (int *) 0x7ffdcf148534
(gdb) p *num_skew_mcvs
$12 = 2396
(gdb)
ExecChooseHashTableSize->獲取最大指標數目(2097152)
(gdb) n
756 max_pointers = Min(max_pointers, MaxAllocSize / sizeof(HashJoinTuple));
(gdb)
758 mppow2 = 1L << my_log2(max_pointers);
(gdb) n
759 if (max_pointers != mppow2)
(gdb) p max_pointers
$13 = 2097152
(gdb) p mppow2
$15 = 2097152
ExecChooseHashTableSize->計算Hash桶數
(gdb) n
764 max_pointers = Min(max_pointers, INT_MAX / 2);
(gdb)
766 dbuckets = ceil(ntuples / NTUP_PER_BUCKET);
(gdb)
767 dbuckets = Min(dbuckets, max_pointers);
(gdb)
768 nbuckets = (int) dbuckets;
(gdb)
770 nbuckets = Max(nbuckets, 1024);
(gdb)
772 nbuckets = 1 << my_log2(nbuckets);
(gdb)
778 bucket_bytes = sizeof(HashJoinTuple) * nbuckets;
(gdb) n
779 if (inner_rel_bytes + bucket_bytes > hash_table_bytes)
(gdb)
834 Assert(nbuckets > 0);
(gdb) p dbuckets
$16 = 10000
(gdb) p nbuckets
$17 = 16384
(gdb) p bucket_bytes
$18 = 131072
ExecChooseHashTableSize->只需要一個批次,賦值,返回
835 Assert(nbatch > 0);
(gdb)
837 *numbuckets = nbuckets;
(gdb)
838 *numbatches = nbatch;
(gdb)
839 }
(gdb)
(gdb)
ExecHashTableCreate (state=0x1e3cbc8, hashOperators=0x1e59890, keepNulls=false) at nodeHash.c:468
468 log2_nbuckets = my_log2(nbuckets);
初始化Hash表
468 log2_nbuckets = my_log2(nbuckets);
(gdb) p nbuckets
$19 = 16384
(gdb) n
469 Assert(nbuckets == (1 << log2_nbuckets));
(gdb)
478 hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData));
(gdb)
479 hashtable->nbuckets = nbuckets;
...
分配記憶體上下文
...
(gdb)
522 hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext,
(gdb)
526 hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt,
(gdb)
532 oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
(gdb)
切換上下文,並初始化hash函式
(gdb)
532 oldcxt = MemoryContextSwitchTo(hashtable->hashCxt);
(gdb) n
538 nkeys = list_length(hashOperators);
(gdb)
540 (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
(gdb) p nkeys
$20 = 1
(gdb) n
539 hashtable->outer_hashfunctions =
(gdb)
542 (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
(gdb)
541 hashtable->inner_hashfunctions =
(gdb)
543 hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
(gdb)
544 i = 0;
初始化Hash運算子
(gdb) n
545 foreach(ho, hashOperators)
(gdb)
547 Oid hashop = lfirst_oid(ho);
(gdb)
551 if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn))
(gdb)
554 fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]);
(gdb)
555 fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]);
(gdb)
556 hashtable->hashStrict[i] = op_strict(hashop);
(gdb)
557 i++;
(gdb)
545 foreach(ho, hashOperators)
(gdb) p *hashtable->hashStrict
$21 = true
(gdb) n
560 if (nbatch > 1 && hashtable->parallel_state == NULL)
分配hash桶記憶體空間
gdb) n
575 MemoryContextSwitchTo(oldcxt);
(gdb)
577 if (hashtable->parallel_state)
(gdb)
631 MemoryContextSwitchTo(hashtable->batchCxt);
(gdb)
634 palloc0(nbuckets * sizeof(HashJoinTuple));
(gdb)
633 hashtable->buckets.unshared = (HashJoinTuple *)
(gdb) p nbuckets
$23 = 16384
構造完成,返回hash表
(gdb) n
641 if (nbatch > 1)
(gdb)
644 MemoryContextSwitchTo(oldcxt);
(gdb)
647 return hashtable;
(gdb)
648 }
(gdb)
ExecHashJoinImpl (pstate=0x1e3c048, parallel=false) at nodeHashjoin.c:282
282 node->hj_HashTable = hashtable;
(gdb)
DONE!
四、參考資料
Hash Joins: Past, Present and Future/PGCon 2017
A Look at How Postgres Executes a Tiny Join - Part 1
A Look at How Postgres Executes a Tiny Join - Part 2
Assignment 2 Symmetric Hash Join
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/6906/viewspace-2374799/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- PostgreSQL 原始碼解讀(90)- 查詢語句#75(ExecHashJoin函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(93)- 查詢語句#77(ExecHashJoin函式#3)SQL原始碼函式
- PostgreSQL 原始碼解讀(95)- 查詢語句#78(ExecHashJoin函式#4-H...SQL原始碼函式
- PostgreSQL 原始碼解讀(97)- 查詢語句#79(ExecHashJoin函式#5-H...SQL原始碼函式
- PostgreSQL 原始碼解讀(89)- 查詢語句#74(SeqNext函式#2)SQL原始碼函式
- PostgreSQL 原始碼解讀(83)- 查詢語句#68(PortalStart函式)SQL原始碼函式
- PostgreSQL 原始碼解讀(39)- 查詢語句#24(query_planner函式#2)SQL原始碼函式
- PostgreSQL 原始碼解讀(82)- 查詢語句#67(PortalXXX系列函式)SQL原始碼函式
- PostgreSQL 原始碼解讀(88)- 查詢語句#73(SeqNext函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(73)- 查詢語句#58(grouping_planner函式...SQL原始碼函式
- PostgreSQL 原始碼解讀(78)- 查詢語句#63(create_plan函式#2-cr...SQL原始碼函式
- PostgreSQL 原始碼解讀(43)- 查詢語句#28(query_planner函式#5)SQL原始碼函式
- PostgreSQL 原始碼解讀(45)- 查詢語句#30(query_planner函式#6)SQL原始碼函式
- PostgreSQL 原始碼解讀(46)- 查詢語句#31(query_planner函式#7)SQL原始碼函式
- PostgreSQL 原始碼解讀(47)- 查詢語句#32(query_planner函式#8)SQL原始碼函式
- PostgreSQL 原始碼解讀(48)- 查詢語句#33(query_planner函式#9)SQL原始碼函式
- PostgreSQL 原始碼解讀(38)- 查詢語句#23(query_planner函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(40)- 查詢語句#25(query_planner函式#3)SQL原始碼函式
- PostgreSQL 原始碼解讀(41)- 查詢語句#26(query_planner函式#4)SQL原始碼函式
- PostgreSQL 原始碼解讀(52)- 查詢語句#37(make_one_rel函式#2-估...SQL原始碼函式
- PostgreSQL 原始碼解讀(17)- 查詢語句#2(查詢優化基礎)SQL原始碼優化
- PostgreSQL 原始碼解讀(32)- 查詢語句#17(查詢優化-表示式預處理#2)SQL原始碼優化
- PostgreSQL 原始碼解讀(51)- 查詢語句#36(Optimizer Review#2)SQL原始碼View
- PostgreSQL 原始碼解讀(24)- 查詢語句#9(查詢重寫)SQL原始碼
- PostgreSQL 原始碼解讀(70)- 查詢語句#55(make_one_rel函式#20-...SQL原始碼函式
- PostgreSQL 原始碼解讀(71)- 查詢語句#56(make_one_rel函式#21-...SQL原始碼函式
- PostgreSQL 原始碼解讀(67)- 查詢語句#52(make_one_rel函式#17-...SQL原始碼函式
- PostgreSQL 原始碼解讀(68)- 查詢語句#53(make_one_rel函式#18-...SQL原始碼函式
- PostgreSQL 原始碼解讀(69)- 查詢語句#54(make_one_rel函式#19-...SQL原始碼函式
- PostgreSQL 原始碼解讀(66)- 查詢語句#51(make_one_rel函式#16-...SQL原始碼函式
- PostgreSQL 原始碼解讀(79)- 查詢語句#64(create_plan函式#3-Se...SQL原始碼函式
- PostgreSQL 原始碼解讀(80)- 查詢語句#65(create_plan函式#4-Jo...SQL原始碼函式
- PostgreSQL 原始碼解讀(72)- 查詢語句#57(make_one_rel函式#22-...SQL原始碼函式
- PostgreSQL 原始碼解讀(65)- 查詢語句#50(make_one_rel函式#15-...SQL原始碼函式
- PostgreSQL 原始碼解讀(62)- 查詢語句#47(make_one_rel函式#12-...SQL原始碼函式
- PostgreSQL 原始碼解讀(63)- 查詢語句#48(make_one_rel函式#13-...SQL原始碼函式
- PostgreSQL 原始碼解讀(64)- 查詢語句#49(make_one_rel函式#14-...SQL原始碼函式
- PostgreSQL 原始碼解讀(60)- 查詢語句#45(make_one_rel函式#10-...SQL原始碼函式