PostgreSQL 原始碼解讀(95)- 查詢語句#78(ExecHashJoin函式#4-H...
本節是ExecHashJoin函式介紹的第四部分,主要介紹了ExecHashJoin中依賴的其他函式的實現邏輯,這些函式在HJ_SCAN_BUCKET階段中使用,主要的函式是ExecScanHashBucket。
一、資料結構
JoinState
Hash/NestLoop/Merge Join的基類
/* ----------------
* JoinState information
*
* Superclass for state nodes of join plans.
* Hash/NestLoop/Merge Join的基類
* ----------------
*/
typedef struct JoinState
{
PlanState ps;//基類PlanState
JoinType jointype;//連線型別
//在找到一個匹配inner tuple的時候,如需要跳轉到下一個outer tuple,則該值為T
bool single_match; /* True if we should skip to next outer tuple
* after finding one inner match */
//連線條件表示式(除了ps.qual)
ExprState *joinqual; /* JOIN quals (in addition to ps.qual) */
} JoinState;
HashJoinState
Hash Join執行期狀態結構體
/* these structs are defined in executor/hashjoin.h: */
typedef struct HashJoinTupleData *HashJoinTuple;
typedef struct HashJoinTableData *HashJoinTable;
typedef struct HashJoinState
{
JoinState js; /* 基類;its first field is NodeTag */
ExprState *hashclauses;//hash連線條件
List *hj_OuterHashKeys; /* 外表條件連結串列;list of ExprState nodes */
List *hj_InnerHashKeys; /* 內表連線條件;list of ExprState nodes */
List *hj_HashOperators; /* 運算子OIDs連結串列;list of operator OIDs */
HashJoinTable hj_HashTable;//Hash表
uint32 hj_CurHashValue;//當前的Hash值
int hj_CurBucketNo;//當前的bucket編號
int hj_CurSkewBucketNo;//行傾斜bucket編號
HashJoinTuple hj_CurTuple;//當前元組
TupleTableSlot *hj_OuterTupleSlot;//outer relation slot
TupleTableSlot *hj_HashTupleSlot;//Hash tuple slot
TupleTableSlot *hj_NullOuterTupleSlot;//用於外連線的outer虛擬slot
TupleTableSlot *hj_NullInnerTupleSlot;//用於外連線的inner虛擬slot
TupleTableSlot *hj_FirstOuterTupleSlot;//
int hj_JoinState;//JoinState狀態
bool hj_MatchedOuter;//是否匹配
bool hj_OuterNotEmpty;//outer relation是否為空
} HashJoinState;
HashJoinTable
Hash表資料結構
typedef struct HashJoinTableData
{
int nbuckets; /* 記憶體中的hash桶數;# buckets in the in-memory hash table */
int log2_nbuckets; /* 2的對數(nbuckets必須是2的冪);its log2 (nbuckets must be a power of 2) */
int nbuckets_original; /* 首次hash時的桶數;# buckets when starting the first hash */
int nbuckets_optimal; /* 最佳化後的桶數(每個批次);optimal # buckets (per batch) */
int log2_nbuckets_optimal; /* 2的對數;log2(nbuckets_optimal) */
/* buckets[i] is head of list of tuples in i'th in-memory bucket */
//bucket [i]是記憶體中第i個桶中的元組連結串列的head item
union
{
/* unshared array is per-batch storage, as are all the tuples */
//未共享陣列是按批處理儲存的,所有元組均如此
struct HashJoinTupleData **unshared;
/* shared array is per-query DSA area, as are all the tuples */
//共享陣列是每個查詢的DSA區域,所有元組均如此
dsa_pointer_atomic *shared;
} buckets;
bool keepNulls; /*如不匹配則儲存NULL元組,該值為T;true to store unmatchable NULL tuples */
bool skewEnabled; /*是否使用傾斜最佳化?;are we using skew optimization? */
HashSkewBucket **skewBucket; /* 傾斜的hash表桶數;hashtable of skew buckets */
int skewBucketLen; /* skewBucket陣列大小;size of skewBucket array (a power of 2!) */
int nSkewBuckets; /* 活動的傾斜桶數;number of active skew buckets */
int *skewBucketNums; /* 活動傾斜桶陣列索引;array indexes of active skew buckets */
int nbatch; /* 批次數;number of batches */
int curbatch; /* 當前批次,第一輪為0;current batch #; 0 during 1st pass */
int nbatch_original; /* 在開始inner掃描時的批次;nbatch when we started inner scan */
int nbatch_outstart; /* 在開始outer掃描時的批次;nbatch when we started outer scan */
bool growEnabled; /* 關閉nbatch增加的標記;flag to shut off nbatch increases */
double totalTuples; /* 從inner plan獲得的元組數;# tuples obtained from inner plan */
double partialTuples; /* 透過hashjoin獲得的inner元組數;# tuples obtained from inner plan by me */
double skewTuples; /* 傾斜元組數;# tuples inserted into skew tuples */
/*
* These arrays are allocated for the life of the hash join, but only if
* nbatch > 1. A file is opened only when we first write a tuple into it
* (otherwise its pointer remains NULL). Note that the zero'th array
* elements never get used, since we will process rather than dump out any
* tuples of batch zero.
* 這些陣列在雜湊連線的生命週期內分配,但僅當nbatch > 1時分配。
* 只有當第一次將元組寫入檔案時,檔案才會開啟(否則它的指標將保持NULL)。
* 注意,第0個陣列元素永遠不會被使用,因為批次0的元組永遠不會轉儲.
*/
BufFile **innerBatchFile; /* 每個批次的inner虛擬臨時檔案快取;buffered virtual temp file per batch */
BufFile **outerBatchFile; /* 每個批次的outer虛擬臨時檔案快取;buffered virtual temp file per batch */
/*
* Info about the datatype-specific hash functions for the datatypes being
* hashed. These are arrays of the same length as the number of hash join
* clauses (hash keys).
* 有關正在雜湊的資料型別的特定於資料型別的雜湊函式的資訊。
* 這些陣列的長度與雜湊連線子句(雜湊鍵)的數量相同。
*/
FmgrInfo *outer_hashfunctions; /* outer hash函式FmgrInfo結構體;lookup data for hash functions */
FmgrInfo *inner_hashfunctions; /* inner hash函式FmgrInfo結構體;lookup data for hash functions */
bool *hashStrict; /* 每個hash運算子是嚴格?is each hash join operator strict? */
Size spaceUsed; /* 元組使用的當前記憶體空間大小;memory space currently used by tuples */
Size spaceAllowed; /* 空間使用上限;upper limit for space used */
Size spacePeak; /* 峰值的空間使用;peak space used */
Size spaceUsedSkew; /* 傾斜雜湊表的當前空間使用情況;skew hash table's current space usage */
Size spaceAllowedSkew; /* 傾斜雜湊表的使用上限;upper limit for skew hashtable */
MemoryContext hashCxt; /* 整個雜湊連線儲存的上下文;context for whole-hash-join storage */
MemoryContext batchCxt; /* 該批次儲存的上下文;context for this-batch-only storage */
/* used for dense allocation of tuples (into linked chunks) */
//用於密集分配元組(到連結塊中)
HashMemoryChunk chunks; /* 整個批次使用一個連結串列;one list for the whole batch */
/* Shared and private state for Parallel Hash. */
//並行hash使用的共享和私有狀態
HashMemoryChunk current_chunk; /* 後臺程式的當前chunk;this backend's current chunk */
dsa_area *area; /* 用於分配記憶體的DSA區域;DSA area to allocate memory from */
ParallelHashJoinState *parallel_state;//並行執行狀態
ParallelHashJoinBatchAccessor *batches;//並行訪問器
dsa_pointer current_chunk_shared;//當前chunk的開始指標
} HashJoinTableData;
typedef struct HashJoinTableData *HashJoinTable;
HashJoinTupleData
Hash連線元組資料
/* ----------------------------------------------------------------
* hash-join hash table structures
*
* Each active hashjoin has a HashJoinTable control block, which is
* palloc'd in the executor's per-query context. All other storage needed
* for the hashjoin is kept in private memory contexts, two for each hashjoin.
* This makes it easy and fast to release the storage when we don't need it
* anymore. (Exception: data associated with the temp files lives in the
* per-query context too, since we always call buffile.c in that context.)
* 每個活動的hashjoin都有一個可雜湊的控制塊,它在執行程式的每個查詢上下文中都是透過palloc分配的。
* hashjoin所需的所有其他儲存都儲存在私有記憶體上下文中,每個hashjoin有兩個。
* 當不再需要它的時候,這使得釋放它變得簡單和快速。
* (例外:與臨時檔案相關的資料也存在於每個查詢上下文中,因為在這種情況下總是呼叫buffile.c。)
*
* The hashtable contexts are made children of the per-query context, ensuring
* that they will be discarded at end of statement even if the join is
* aborted early by an error. (Likewise, any temporary files we make will
* be cleaned up by the virtual file manager in event of an error.)
* hashtable上下文是每個查詢上下文的子上下文,確保在語句結束時丟棄它們,即使連線因錯誤而提前中止。
* (同樣,如果出現錯誤,虛擬檔案管理器將清理建立的任何臨時檔案。)
*
* Storage that should live through the entire join is allocated from the
* "hashCxt", while storage that is only wanted for the current batch is
* allocated in the "batchCxt". By resetting the batchCxt at the end of
* each batch, we free all the per-batch storage reliably and without tedium.
* 透過整個連線的儲存空間應從“hashCxt”分配,而只需要當前批處理的儲存空間在“batchCxt”中分配。
* 透過在每個批處理結束時重置batchCxt,可以可靠地釋放每個批處理的所有儲存,而不會感到單調乏味。
*
* During first scan of inner relation, we get its tuples from executor.
* If nbatch > 1 then tuples that don't belong in first batch get saved
* into inner-batch temp files. The same statements apply for the
* first scan of the outer relation, except we write tuples to outer-batch
* temp files. After finishing the first scan, we do the following for
* each remaining batch:
* 1. Read tuples from inner batch file, load into hash buckets.
* 2. Read tuples from outer batch file, match to hash buckets and output.
* 在內部關係的第一次掃描中,從執行者那裡得到了它的元組。
* 如果nbatch > 1,那麼不屬於第一批的元組將儲存到批內臨時檔案中。
* 相同的語句適用於外關係的第一次掃描,但是我們將元組寫入外部批處理臨時檔案。
* 完成第一次掃描後,我們對每批剩餘的元組做如下處理:
* 1.從內部批處理檔案讀取元組,載入到雜湊桶中。
* 2.從外部批處理檔案讀取元組,匹配雜湊桶和輸出。
*
* It is possible to increase nbatch on the fly if the in-memory hash table
* gets too big. The hash-value-to-batch computation is arranged so that this
* can only cause a tuple to go into a later batch than previously thought,
* never into an earlier batch. When we increase nbatch, we rescan the hash
* table and dump out any tuples that are now of a later batch to the correct
* inner batch file. Subsequently, while reading either inner or outer batch
* files, we might find tuples that no longer belong to the current batch;
* if so, we just dump them out to the correct batch file.
* 如果記憶體中的雜湊表太大,可以動態增加nbatch。
* 雜湊值到批處理的計算是這樣安排的:
* 這隻會導致元組進入比以前認為的更晚的批處理,而不會進入更早的批處理。
* 當增加nbatch時,重新掃描雜湊表,並將現在屬於後面批處理的任何元組轉儲到正確的內部批處理檔案。
* 隨後,在讀取內部或外部批處理檔案時,可能會發現不再屬於當前批處理的元組;
* 如果是這樣,只需將它們轉儲到正確的批處理檔案即可。
* ----------------------------------------------------------------
*/
/* these are in nodes/execnodes.h: */
/* typedef struct HashJoinTupleData *HashJoinTuple; */
/* typedef struct HashJoinTableData *HashJoinTable; */
typedef struct HashJoinTupleData
{
/* link to next tuple in same bucket */
//link同一個桶中的下一個元組
union
{
struct HashJoinTupleData *unshared;
dsa_pointer shared;
} next;
uint32 hashvalue; /* 元組的hash值;tuple's hash code */
/* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
} HashJoinTupleData;
#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
#define HJTUPLE_MINTUPLE(hjtup) \
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
二、原始碼解讀
ExecScanHashBucket
搜尋匹配當前outer relation tuple的hash桶,尋找匹配的inner relation元組。
/*----------------------------------------------------------------------------------------------------
HJ_SCAN_BUCKET 階段
----------------------------------------------------------------------------------------------------*/
/*
* ExecScanHashBucket
* scan a hash bucket for matches to the current outer tuple
* 搜尋匹配當前outer relation tuple的hash桶
*
* The current outer tuple must be stored in econtext->ecxt_outertuple.
* 當前的outer relation tuple必須儲存在econtext->ecxt_outertuple中
*
* On success, the inner tuple is stored into hjstate->hj_CurTuple and
* econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
* for the latter.
* 成功後,內部元組儲存到hjstate->hj_CurTuple和econtext->ecxt_innertuple中,
* 使用hjstate->hj_HashTupleSlot作為後者的slot。
*/
bool
ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext)
{
ExprState *hjclauses = hjstate->hashclauses;//hash連線條件表示式
HashJoinTable hashtable = hjstate->hj_HashTable;//Hash表
HashJoinTuple hashTuple = hjstate->hj_CurTuple;//當前的Tuple
uint32 hashvalue = hjstate->hj_CurHashValue;//hash值
/*
* hj_CurTuple is the address of the tuple last returned from the current
* bucket, or NULL if it's time to start scanning a new bucket.
* hj_CurTuple是最近從當前桶返回的元組的地址,如果需要開始掃描新桶,則為NULL。
*
* If the tuple hashed to a skew bucket then scan the skew bucket
* otherwise scan the standard hashtable bucket.
* 如果元組雜湊到傾斜桶,則掃描傾斜桶,否則掃描標準雜湊表桶。
*/
if (hashTuple != NULL)
hashTuple = hashTuple->next.unshared;//hashTuple,透過指標獲取下一個
else if (hjstate->hj_CurSkewBucketNo != INVALID_SKEW_BUCKET_NO)
//如為NULL,而且使用傾斜最佳化,則從傾斜桶中獲取
hashTuple = hashtable->skewBucket[hjstate->hj_CurSkewBucketNo]->tuples;
else
////如為NULL,不使用傾斜最佳化,從常規的bucket中獲取
hashTuple = hashtable->buckets.unshared[hjstate->hj_CurBucketNo];
while (hashTuple != NULL)//迴圈
{
if (hashTuple->hashvalue == hashvalue)//hash值一致
{
TupleTableSlot *inntuple;//inner tuple
/* insert hashtable's tuple into exec slot so ExecQual sees it */
//把Hash表中的tuple插入到執行器的slot中,作為函式ExecQual的輸入使用
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
hjstate->hj_HashTupleSlot,
false); /* do not pfree */
econtext->ecxt_innertuple = inntuple;//賦值
if (ExecQualAndReset(hjclauses, econtext))//判斷連線條件是否滿足
{
hjstate->hj_CurTuple = hashTuple;//滿足,則賦值&返回T
return true;
}
}
hashTuple = hashTuple->next.unshared;//從Hash表中獲取下一個tuple
}
/*
* no match
* 不匹配,返回F
*/
return false;
}
/*
* Store a minimal tuple into TTSOpsMinimalTuple type slot.
* 儲存最小化的tuple到TTSOpsMinimalTuple型別的slot中
*
* If the target slot is not guaranteed to be TTSOpsMinimalTuple type slot,
* use the, more expensive, ExecForceStoreMinimalTuple().
* 如果目標slot不能確保是TTSOpsMinimalTuple型別,使用代價更高的ExecForceStoreMinimalTuple()函式
*/
TupleTableSlot *
ExecStoreMinimalTuple(MinimalTuple mtup,
TupleTableSlot *slot,
bool shouldFree)
{
/*
* sanity checks
* 安全檢查
*/
Assert(mtup != NULL);
Assert(slot != NULL);
Assert(slot->tts_tupleDescriptor != NULL);
if (unlikely(!TTS_IS_MINIMALTUPLE(slot)))//型別檢查
elog(ERROR, "trying to store a minimal tuple into wrong type of slot");
tts_minimal_store_tuple(slot, mtup, shouldFree);//儲存
return slot;//返回slot
}
static void
tts_minimal_store_tuple(TupleTableSlot *slot, MinimalTuple mtup, bool shouldFree)
{
MinimalTupleTableSlot *mslot = (MinimalTupleTableSlot *) slot;//獲取slot
tts_minimal_clear(slot);//清除原來的slot
//安全檢查
Assert(!TTS_SHOULDFREE(slot));
Assert(TTS_EMPTY(slot));
//設定slot資訊
slot->tts_flags &= ~TTS_FLAG_EMPTY;
slot->tts_nvalid = 0;
mslot->off = 0;
//儲存到mslot中
mslot->mintuple = mtup;
Assert(mslot->tuple == &mslot->minhdr);
mslot->minhdr.t_len = mtup->t_len + MINIMAL_TUPLE_OFFSET;
mslot->minhdr.t_data = (HeapTupleHeader) ((char *) mtup - MINIMAL_TUPLE_OFFSET);
/* no need to set t_self or t_tableOid since we won't allow access */
//不需要設定t_sefl或者t_tableOid,因為不允許訪問
if (shouldFree)
slot->tts_flags |= TTS_FLAG_SHOULDFREE;
else
Assert(!TTS_SHOULDFREE(slot));
}
/*
* ExecQualAndReset() - evaluate qual with ExecQual() and reset expression
* context.
* ExecQualAndReset() - 使用ExecQual()解析並重置表示式
*/
#ifndef FRONTEND
static inline bool
ExecQualAndReset(ExprState *state, ExprContext *econtext)
{
bool ret = ExecQual(state, econtext);//呼叫ExecQual
/* inline ResetExprContext, to avoid ordering issue in this file */
//內聯ResetExprContext,避免在這個檔案中的ordering
MemoryContextReset(econtext->ecxt_per_tuple_memory);
return ret;
}
#endif
#define HeapTupleHeaderSetMatch(tup) \
( \
(tup)->t_infomask2 |= HEAP_TUPLE_HAS_MATCH \
)
三、跟蹤分析
測試指令碼如下
testdb=# set enable_nestloop=false;
SET
testdb=# set enable_mergejoin=false;
SET
testdb=# explain verbose select dw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.je
testdb-# from t_dwxx dw,lateral (select gr.grbh,gr.xm,jf.ny,jf.je
testdb(# from t_grxx gr inner join t_jfxx jf
testdb(# on gr.dwbh = dw.dwbh
testdb(# and gr.grbh = jf.grbh) grjf
testdb-# order by dw.dwbh;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Sort (cost=14828.83..15078.46 rows=99850 width=47)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je
Sort Key: dw.dwbh
-> Hash Join (cost=3176.00..6537.55 rows=99850 width=47)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je
Hash Cond: ((gr.grbh)::text = (jf.grbh)::text)
-> Hash Join (cost=289.00..2277.61 rows=99850 width=32)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm
Inner Unique: true
Hash Cond: ((gr.dwbh)::text = (dw.dwbh)::text)
-> Seq Scan on public.t_grxx gr (cost=0.00..1726.00 rows=100000 width=16)
Output: gr.dwbh, gr.grbh, gr.xm, gr.xb, gr.nl
-> Hash (cost=164.00..164.00 rows=10000 width=20)
Output: dw.dwmc, dw.dwbh, dw.dwdz
-> Seq Scan on public.t_dwxx dw (cost=0.00..164.00 rows=10000 width=20)
Output: dw.dwmc, dw.dwbh, dw.dwdz
-> Hash (cost=1637.00..1637.00 rows=100000 width=20)
Output: jf.ny, jf.je, jf.grbh
-> Seq Scan on public.t_jfxx jf (cost=0.00..1637.00 rows=100000 width=20)
Output: jf.ny, jf.je, jf.grbh
(20 rows)
啟動gdb,設定斷點
(gdb) b ExecScanHashBucket
Breakpoint 1 at 0x6ff25b: file nodeHash.c, line 1910.
(gdb) c
Continuing.
Breakpoint 1, ExecScanHashBucket (hjstate=0x2bb8738, econtext=0x2bb8950) at nodeHash.c:1910
1910 ExprState *hjclauses = hjstate->hashclauses;
設定相關變數
1910 ExprState *hjclauses = hjstate->hashclauses;
(gdb) n
1911 HashJoinTable hashtable = hjstate->hj_HashTable;
(gdb)
1912 HashJoinTuple hashTuple = hjstate->hj_CurTuple;
(gdb)
1913 uint32 hashvalue = hjstate->hj_CurHashValue;
(gdb)
1922 if (hashTuple != NULL)
hash join連線條件
(gdb) p *hjclauses
$1 = {tag = {type = T_ExprState}, flags = 7 '\a', resnull = false, resvalue = 0, resultslot = 0x0, steps = 0x2bc4bc8,
evalfunc = 0x6d1a6e <ExecInterpExprStillValid>, expr = 0x2bb60c0, evalfunc_private = 0x6cf625 <ExecInterpExpr>,
steps_len = 7, steps_alloc = 16, parent = 0x2bb8738, ext_params = 0x0, innermost_caseval = 0x0, innermost_casenull = 0x0,
innermost_domainval = 0x0, innermost_domainnull = 0x0}
hash表
(gdb) p hashtable
$2 = (HashJoinTable) 0x2bc9de8
(gdb) p *hashtable
$3 = {nbuckets = 16384, log2_nbuckets = 14, nbuckets_original = 16384, nbuckets_optimal = 16384,
log2_nbuckets_optimal = 14, buckets = {unshared = 0x7f0fc1345050, shared = 0x7f0fc1345050}, keepNulls = false,
skewEnabled = false, skewBucket = 0x0, skewBucketLen = 0, nSkewBuckets = 0, skewBucketNums = 0x0, nbatch = 1,
curbatch = 0, nbatch_original = 1, nbatch_outstart = 1, growEnabled = true, totalTuples = 10000, partialTuples = 10000,
skewTuples = 0, innerBatchFile = 0x0, outerBatchFile = 0x0, outer_hashfunctions = 0x2bdc228,
inner_hashfunctions = 0x2bdc280, hashStrict = 0x2bdc2d8, spaceUsed = 677754, spaceAllowed = 16777216, spacePeak = 677754,
spaceUsedSkew = 0, spaceAllowedSkew = 335544, hashCxt = 0x2bdc110, batchCxt = 0x2bde120, chunks = 0x2c708f0,
current_chunk = 0x0, area = 0x0, parallel_state = 0x0, batches = 0x0, current_chunk_shared = 0}
hash桶中的元組&hash值
(gdb) p *hashTuple
Cannot access memory at address 0x0
(gdb) p hashvalue
$4 = 2324234220
(gdb)
從常規hash桶中獲取hash元組
(gdb) n
1924 else if (hjstate->hj_CurSkewBucketNo != INVALID_SKEW_BUCKET_NO)
(gdb) p hjstate->hj_CurSkewBucketNo
$5 = -1
(gdb) n
1927 hashTuple = hashtable->buckets.unshared[hjstate->hj_CurBucketNo];
(gdb)
1929 while (hashTuple != NULL)
(gdb) p hjstate->hj_CurBucketNo
$7 = 16364
(gdb) p *hashTuple
$6 = {next = {unshared = 0x0, shared = 0}, hashvalue = 1822113772}
判斷hash值是否一致
(gdb) n
1931 if (hashTuple->hashvalue == hashvalue)
(gdb) p hashTuple->hashvalue
$8 = 1822113772
(gdb) p hashvalue
$9 = 2324234220
(gdb)
不一致,繼續下一個元組
(gdb) n
1948 hashTuple = hashTuple->next.unshared;
(gdb)
1929 while (hashTuple != NULL)
下一個元組為NULL,返回F,說明沒有匹配的元組
(gdb) p *hashTuple
Cannot access memory at address 0x0
(gdb) n
1954 return false;
在ExecStoreMinimalTuple上設定斷點(這時候Hash值是一致的)
(gdb) b ExecStoreMinimalTuple
Breakpoint 2 at 0x6e8cbf: file execTuples.c, line 427.
(gdb) c
Continuing.
Breakpoint 1, ExecScanHashBucket (hjstate=0x2bb8738, econtext=0x2bb8950) at nodeHash.c:1910
1910 ExprState *hjclauses = hjstate->hashclauses;
(gdb) del 1
(gdb) c
Continuing.
Breakpoint 2, ExecStoreMinimalTuple (mtup=0x2be81b0, slot=0x2bb9c18, shouldFree=false) at execTuples.c:427
427 Assert(mtup != NULL);
(gdb) finish
Run till exit from #0 ExecStoreMinimalTuple (mtup=0x2be81b0, slot=0x2bb9c18, shouldFree=false) at execTuples.c:427
0x00000000006ff335 in ExecScanHashBucket (hjstate=0x2bb8738, econtext=0x2bb8950) at nodeHash.c:1936
1936 inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
Value returned is $10 = (TupleTableSlot *) 0x2bb9c18
(gdb) n
1939 econtext->ecxt_innertuple = inntuple;
匹配成功,返回T
(gdb) n
1941 if (ExecQualAndReset(hjclauses, econtext))
(gdb)
1943 hjstate->hj_CurTuple = hashTuple;
(gdb)
1944 return true;
(gdb)
1955 }
(gdb)
DONE!
HJ_SCAN_BUCKET階段,實現的邏輯是掃描Hash桶,尋找inner relation中與outer relation元組匹配的元組,如匹配,則把匹配的Tuple儲存在hjstate->hj_CurTuple中.
四、參考資料
Hash Joins: Past, Present and Future/PGCon 2017
A Look at How Postgres Executes a Tiny Join - Part 1
A Look at How Postgres Executes a Tiny Join - Part 2
Assignment 2 Symmetric Hash Join
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/6906/viewspace-2374795/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- PostgreSQL 原始碼解讀(90)- 查詢語句#75(ExecHashJoin函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(91)- 查詢語句#76(ExecHashJoin函式#2)SQL原始碼函式
- PostgreSQL 原始碼解讀(93)- 查詢語句#77(ExecHashJoin函式#3)SQL原始碼函式
- PostgreSQL 原始碼解讀(97)- 查詢語句#79(ExecHashJoin函式#5-H...SQL原始碼函式
- PostgreSQL 原始碼解讀(78)- 查詢語句#63(create_plan函式#2-cr...SQL原始碼函式
- PostgreSQL 原始碼解讀(83)- 查詢語句#68(PortalStart函式)SQL原始碼函式
- PostgreSQL 原始碼解讀(88)- 查詢語句#73(SeqNext函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(89)- 查詢語句#74(SeqNext函式#2)SQL原始碼函式
- PostgreSQL 原始碼解讀(46)- 查詢語句#31(query_planner函式#7)SQL原始碼函式
- PostgreSQL 原始碼解讀(47)- 查詢語句#32(query_planner函式#8)SQL原始碼函式
- PostgreSQL 原始碼解讀(48)- 查詢語句#33(query_planner函式#9)SQL原始碼函式
- PostgreSQL 原始碼解讀(41)- 查詢語句#26(query_planner函式#4)SQL原始碼函式
- PostgreSQL 原始碼解讀(40)- 查詢語句#25(query_planner函式#3)SQL原始碼函式
- PostgreSQL 原始碼解讀(43)- 查詢語句#28(query_planner函式#5)SQL原始碼函式
- PostgreSQL 原始碼解讀(45)- 查詢語句#30(query_planner函式#6)SQL原始碼函式
- PostgreSQL 原始碼解讀(38)- 查詢語句#23(query_planner函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(39)- 查詢語句#24(query_planner函式#2)SQL原始碼函式
- PostgreSQL 原始碼解讀(73)- 查詢語句#58(grouping_planner函式...SQL原始碼函式
- PostgreSQL 原始碼解讀(82)- 查詢語句#67(PortalXXX系列函式)SQL原始碼函式
- PostgreSQL 原始碼解讀(24)- 查詢語句#9(查詢重寫)SQL原始碼
- PostgreSQL 原始碼解讀(79)- 查詢語句#64(create_plan函式#3-Se...SQL原始碼函式
- PostgreSQL 原始碼解讀(80)- 查詢語句#65(create_plan函式#4-Jo...SQL原始碼函式
- PostgreSQL 原始碼解讀(65)- 查詢語句#50(make_one_rel函式#15-...SQL原始碼函式
- PostgreSQL 原始碼解讀(62)- 查詢語句#47(make_one_rel函式#12-...SQL原始碼函式
- PostgreSQL 原始碼解讀(63)- 查詢語句#48(make_one_rel函式#13-...SQL原始碼函式
- PostgreSQL 原始碼解讀(64)- 查詢語句#49(make_one_rel函式#14-...SQL原始碼函式
- PostgreSQL 原始碼解讀(60)- 查詢語句#45(make_one_rel函式#10-...SQL原始碼函式
- PostgreSQL 原始碼解讀(61)- 查詢語句#46(make_one_rel函式#11-...SQL原始碼函式
- PostgreSQL 原始碼解讀(69)- 查詢語句#54(make_one_rel函式#19-...SQL原始碼函式
- PostgreSQL 原始碼解讀(70)- 查詢語句#55(make_one_rel函式#20-...SQL原始碼函式
- PostgreSQL 原始碼解讀(66)- 查詢語句#51(make_one_rel函式#16-...SQL原始碼函式
- PostgreSQL 原始碼解讀(67)- 查詢語句#52(make_one_rel函式#17-...SQL原始碼函式
- PostgreSQL 原始碼解讀(68)- 查詢語句#53(make_one_rel函式#18-...SQL原始碼函式
- PostgreSQL 原始碼解讀(71)- 查詢語句#56(make_one_rel函式#21-...SQL原始碼函式
- PostgreSQL 原始碼解讀(72)- 查詢語句#57(make_one_rel函式#22-...SQL原始碼函式
- PostgreSQL 原始碼解讀(20)- 查詢語句#5(查詢樹Query詳解)SQL原始碼
- PostgreSQL 原始碼解讀(18)- 查詢語句#3(SQL Parse)SQL原始碼
- PostgreSQL 原始碼解讀(19)- 查詢語句#4(ParseTree詳解)SQL原始碼