PostgreSQL 原始碼解讀(53)- 查詢語句#38(make_one_rel函式#3-順...
本節繼續介紹make_one_rel函式中的set_base_rel_pathlists函式及其子函式。set_base_rel_pathlists函式的目的是為每一個base rel找出所有可用的訪問路徑(包括順序掃描和所有可用的索引),每一個可用的路徑都會新增到pathlist連結串列中。這一小節主要介紹常規(區別於並行)順序掃描部分。
make_one_rel原始碼:
RelOptInfo *
make_one_rel(PlannerInfo *root, List *joinlist)
{
//...
/*
* Compute size estimates and consider_parallel flags for each base rel,
* then generate access paths.
*/
set_base_rel_sizes(root);//估算Relation的Size並且設定consider_parallel標記
set_base_rel_pathlists(root);//生成Relation的掃描(訪問)路徑
/*
* Generate access paths for the entire join tree.
* 透過動態規劃或遺傳演算法生成連線路徑
*/
rel = make_rel_from_joinlist(root, joinlist);
/*
* The result should join all and only the query's base rels.
*/
Assert(bms_equal(rel->relids, root->all_baserels));
//返回最上層的RelOptInfo
return rel;
}
一、資料結構
RelOptInfo
typedef struct RelOptInfo
{
NodeTag type;//節點標識
RelOptKind reloptkind;//RelOpt型別
/* all relations included in this RelOptInfo */
Relids relids; /*Relids(rtindex)集合 set of base relids (rangetable indexes) */
/* size estimates generated by planner */
double rows; /*結果元組的估算數量 estimated number of result tuples */
/* per-relation planner control flags */
bool consider_startup; /*是否考慮啟動成本?是,需要保留啟動成本低的路徑 keep cheap-startup-cost paths? */
bool consider_param_startup; /*是否考慮引數化?的路徑 ditto, for parameterized paths? */
bool consider_parallel; /*是否考慮並行處理路徑 consider parallel paths? */
/* default result targetlist for Paths scanning this relation */
struct PathTarget *reltarget; /*掃描該Relation時預設的結果 list of Vars/Exprs, cost, width */
/* materialization information */
List *pathlist; /*訪問路徑連結串列 Path structures */
List *ppilist; /*路徑連結串列中使用引數化路徑進行 ParamPathInfos used in pathlist */
List *partial_pathlist; /* partial Paths */
struct Path *cheapest_startup_path;//代價最低的啟動路徑
struct Path *cheapest_total_path;//代價最低的整體路徑
struct Path *cheapest_unique_path;//代價最低的獲取唯一值的路徑
List *cheapest_parameterized_paths;//代價最低的引數化路徑連結串列
/* parameterization information needed for both base rels and join rels */
/* (see also lateral_vars and lateral_referencers) */
Relids direct_lateral_relids; /*使用lateral語法,需依賴的Relids rels directly laterally referenced */
Relids lateral_relids; /* minimum parameterization of rel */
/* information about a base rel (not set for join rels!) */
//reloptkind=RELOPT_BASEREL時使用的資料結構
Index relid; /* Relation ID */
Oid reltablespace; /* 表空間 containing tablespace */
RTEKind rtekind; /* 基表?子查詢?還是函式等等?RELATION, SUBQUERY, FUNCTION, etc */
AttrNumber min_attr; /* 最小的屬性編號 smallest attrno of rel (often <0) */
AttrNumber max_attr; /* 最大的屬性編號 largest attrno of rel */
Relids *attr_needed; /* 陣列 array indexed [min_attr .. max_attr] */
int32 *attr_widths; /* 屬性寬度 array indexed [min_attr .. max_attr] */
List *lateral_vars; /* 關係依賴的Vars/PHVs LATERAL Vars and PHVs referenced by rel */
Relids lateral_referencers; /*依賴該關係的Relids rels that reference me laterally */
List *indexlist; /* 該關係的IndexOptInfo連結串列 list of IndexOptInfo */
List *statlist; /* 統計資訊連結串列 list of StatisticExtInfo */
BlockNumber pages; /* 塊數 size estimates derived from pg_class */
double tuples; /* 元組數 */
double allvisfrac; /* ? */
PlannerInfo *subroot; /* 如為子查詢,儲存子查詢的root if subquery */
List *subplan_params; /* 如為子查詢,儲存子查詢的引數 if subquery */
int rel_parallel_workers; /* 並行執行,需要多少個workers? wanted number of parallel workers */
/* Information about foreign tables and foreign joins */
//FDW相關資訊
Oid serverid; /* identifies server for the table or join */
Oid userid; /* identifies user to check access as */
bool useridiscurrent; /* join is only valid for current user */
/* use "struct FdwRoutine" to avoid including fdwapi.h here */
struct FdwRoutine *fdwroutine;
void *fdw_private;
/* cache space for remembering if we have proven this relation unique */
//已知的,可保證唯一元組返回的Relids連結串列
List *unique_for_rels; /* known unique for these other relid
* set(s) */
List *non_unique_for_rels; /* 已知的,返回的資料不唯一的Relids連結串列 known not unique for these set(s) */
/* used by various scans and joins: */
List *baserestrictinfo; /* 如為基本關係,則儲存約束條件 RestrictInfo structures (if base rel) */
QualCost baserestrictcost; /* 解析約束表示式的成本? cost of evaluating the above */
Index baserestrict_min_security; /* 最低安全等級 min security_level found in
* baserestrictinfo */
List *joininfo; /* 連線語句的約束條件資訊 RestrictInfo structures for join clauses
* involving this rel */
bool has_eclass_joins; /* 是否存在等價類連線? True意味著joininfo並不完整,,T means joininfo is incomplete */
/* used by partitionwise joins: */
//是否嘗試partitionwise連線,這是PG 11的一個新特性.
bool consider_partitionwise_join; /* consider partitionwise
* join paths? (if
* partitioned rel) */
Relids top_parent_relids; /* Relids of topmost parents (if "other"
* rel) */
/* used for partitioned relations */
//分割槽表使用
PartitionScheme part_scheme; /* 分割槽的schema Partitioning scheme. */
int nparts; /* 分割槽數 number of partitions */
struct PartitionBoundInfoData *boundinfo; /* 分割槽邊界資訊 Partition bounds */
List *partition_qual; /* 分割槽約束 partition constraint */
struct RelOptInfo **part_rels; /* 分割槽的RelOptInfo陣列 Array of RelOptInfos of partitions,
* stored in the same order of bounds */
List **partexprs; /* 非空分割槽鍵表示式 Non-nullable partition key expressions. */
List **nullable_partexprs; /* 可為空的分割槽鍵表示式 Nullable partition key expressions. */
List *partitioned_child_rels; /* RT Indexes連結串列 List of RT indexes. */
} RelOptInfo;
ParamPathInfo
/*
* ParamPathInfo
*
* All parameterized paths for a given relation with given required outer rels
* link to a single ParamPathInfo, which stores common information such as
* the estimated rowcount for this parameterization. We do this partly to
* avoid recalculations, but mostly to ensure that the estimated rowcount
* is in fact the same for every such path.
*
* Note: ppi_clauses is only used in ParamPathInfos for base relation paths;
* in join cases it's NIL because the set of relevant clauses varies depending
* on how the join is formed. The relevant clauses will appear in each
* parameterized join path's joinrestrictinfo list, instead.
*/
typedef struct ParamPathInfo
{
NodeTag type;//節點型別
Relids ppi_req_outer; /* 訪問路徑需要使用的Relids引數,rels supplying parameters used by path */
double ppi_rows; /* 估算的結果元組數,estimated number of result tuples */
List *ppi_clauses; /* 外部rels提供的可用連線條件連結串列,join clauses available from outer rels */
} ParamPathInfo;
Cost相關
注意:實際使用的引數值透過系統配置檔案定義,而不是這裡的常量定義!
/*
* The cost estimate produced by cost_qual_eval() includes both a one-time
* (startup) cost, and a per-tuple cost.
*/
typedef struct QualCost
{
Cost startup; /* 啟動成本,one-time cost */
Cost per_tuple; /* 每個元組的成本,per-evaluation cost */
} QualCost;
typedef double Cost; /* execution cost (in page-access units) */
/* defaults for costsize.c's Cost parameters */
/* NB: cost-estimation code should use the variables, not these constants! */
/* 注意:實際值透過系統配置檔案定義,而不是這裡的常量定義! */
/* If you change these, update backend/utils/misc/postgresql.sample.conf */
#define DEFAULT_SEQ_PAGE_COST 1.0 //順序掃描page的成本
#define DEFAULT_RANDOM_PAGE_COST 4.0 //隨機掃描page的成本
#define DEFAULT_CPU_TUPLE_COST 0.01 //處理一個元組的CPU成本
#define DEFAULT_CPU_INDEX_TUPLE_COST 0.005 //處理一個索引元組的CPU成本
#define DEFAULT_CPU_OPERATOR_COST 0.0025 //執行一次操作或函式的CPU成本
#define DEFAULT_PARALLEL_TUPLE_COST 0.1 //並行執行,從一個worker傳輸一個元組到另一個worker的成本
#define DEFAULT_PARALLEL_SETUP_COST 1000.0 //構建並行執行環境的成本
#define DEFAULT_EFFECTIVE_CACHE_SIZE 524288 /*先前已有介紹, measured in pages */
double seq_page_cost = DEFAULT_SEQ_PAGE_COST;
double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
Cost disable_cost = 1.0e10;//1後面10個0,透過設定一個巨大的成本,讓最佳化器自動放棄此路徑
int max_parallel_workers_per_gather = 2;//每次gather使用的worker數
二、原始碼解讀
set_base_rel_pathlists函式遍歷RelOptInfo陣列,為每一個Rel構造訪問路徑.
//--------------------------------------------------------
/*
* set_base_rel_pathlists
* Finds all paths available for scanning each base-relation entry.
* Sequential scan and any available indices are considered.
* Each useful path is attached to its relation's 'pathlist' field.
*
* 為每一個base rels找出所有可用的訪問路徑(包括順序掃描和所有可用的索引)
* 每一個可用的路徑都會新增到pathlist連結串列中
*
*/
static void
set_base_rel_pathlists(PlannerInfo *root)
{
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)//遍歷RelOptInfo陣列
{
RelOptInfo *rel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (rel == NULL)
continue;
Assert(rel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (rel->reloptkind != RELOPT_BASEREL)
continue;
set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
}
}
//-------------------------------------------------------- set_rel_pathlist
/*
* set_rel_pathlist
* Build access paths for a base relation
*/
static void
set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (IS_DUMMY_REL(rel))
{
/* We already proved the relation empty, so nothing more to do */
}
else if (rte->inh)//inherit
{
/* It's an "append relation", process accordingly */
set_append_rel_pathlist(root, rel, rti, rte);
}
else//常規
{
switch (rel->rtekind)
{
case RTE_RELATION://資料表
if (rte->relkind == RELKIND_FOREIGN_TABLE)//FDW
{
/* Foreign table */
set_foreign_pathlist(root, rel, rte);
}
else if (rte->tablesample != NULL)//取樣表
{
/* Sampled relation */
set_tablesample_rel_pathlist(root, rel, rte);
}
else//常規資料表
{
/* Plain relation */
set_plain_rel_pathlist(root, rel, rte);
}
break;
case RTE_SUBQUERY://子查詢
/* Subquery --- 已在set_rel_size處理,fully handled during set_rel_size */
/* 函式:set_subquery_pathlist */
break;
case RTE_FUNCTION:
/* RangeFunction */
set_function_pathlist(root, rel, rte);
break;
case RTE_TABLEFUNC:
/* Table Function */
set_tablefunc_pathlist(root, rel, rte);
break;
case RTE_VALUES:
/* Values list */
set_values_pathlist(root, rel, rte);
break;
case RTE_CTE:
/* CTE reference --- fully handled during set_rel_size */
break;
case RTE_NAMEDTUPLESTORE:
/* tuplestore reference --- fully handled during set_rel_size */
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
break;
}
}
/*
* If this is a baserel, we should normally consider gathering any partial
* paths we may have created for it.
*
* However, if this is an inheritance child, skip it. Otherwise, we could
* end up with a very large number of gather nodes, each trying to grab
* its own pool of workers. Instead, we'll consider gathering partial
* paths for the parent appendrel.
*
* Also, if this is the topmost scan/join rel (that is, the only baserel),
* we postpone this until the final scan/join targelist is available (see
* grouping_planner).
*/
if (rel->reloptkind == RELOPT_BASEREL &&
bms_membership(root->all_baserels) != BMS_SINGLETON)
generate_gather_paths(root, rel, false);
/*
* Allow a plugin to editorialize on the set of Paths for this base
* relation. It could add new paths (such as CustomPaths) by calling
* add_path(), or delete or modify paths added by the core code.
*/
if (set_rel_pathlist_hook)//鉤子函式
(*set_rel_pathlist_hook) (root, rel, rti, rte);
/* Now find the cheapest of the paths for this rel */
set_cheapest(rel);//找出代價最低的訪問路徑
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
#endif
}
//-------------------------------------------------------- set_plain_rel_pathlist
/*
* set_plain_rel_pathlist
* Build access paths for a plain relation (no subquery, no inheritance)
*/
static void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Relids required_outer;
/*
* We don't support pushing join clauses into the quals of a seqscan, but
* it could still have required parameterization due to LATERAL refs in
* its tlist.
*/
required_outer = rel->lateral_relids;//需依賴的上層Relids
/* 順序掃描,Consider sequential scan */
add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
/* 如合適,嘗試並行順序掃描,If appropriate, consider parallel sequential scan */
if (rel->consider_parallel && required_outer == NULL)
create_plain_partial_paths(root, rel);
/* 索引掃描,Consider index scans */
create_index_paths(root, rel);
/* TID掃描,Consider TID scans */
create_tidscan_paths(root, rel);
}
//-------------------------------------------------------- create_seqscan_path
/*
* create_seqscan_path
* Creates a path corresponding to a sequential scan, returning the
* pathnode.
*/
Path *
create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
Relids required_outer, int parallel_workers)
{
Path *pathnode = makeNode(Path);//順序掃描路徑
pathnode->pathtype = T_SeqScan;//順序掃描
pathnode->parent = rel;//RelOptInfo
pathnode->pathtarget = rel->reltarget;//投影列
pathnode->param_info = get_baserel_parampathinfo(root, rel,
required_outer);//獲取引數化路徑資訊ParamPathInfo
pathnode->parallel_aware = parallel_workers > 0 ? true : false;//並行
pathnode->parallel_safe = rel->consider_parallel;//
pathnode->parallel_workers = parallel_workers;//並行數
pathnode->pathkeys = NIL; /* 順序掃描,不執行排序,seqscan has unordered result */
cost_seqscan(pathnode, root, rel, pathnode->param_info);
return pathnode;
}
//-------------------------------------------- get_baserel_parampathinfo
/*
* get_baserel_parampathinfo
* Get the ParamPathInfo for a parameterized path for a base relation,
* constructing one if we don't have one already.
*
* 獲取base rel的引數化路徑,儲存在結構體ParamPathInfo中.如果沒有,那麼構造一個.
*
* This centralizes estimating the rowcounts for parameterized paths.
* We need to cache those to be sure we use the same rowcount for all paths
* of the same parameterization for a given rel. This is also a convenient
* place to determine which movable join clauses the parameterized path will
* be responsible for evaluating.
*
* 統一/集中估計引數化路徑的行數。透過快取這些資料,對於相同的引數,可以快速給出相應的行數。
*/
ParamPathInfo *
get_baserel_parampathinfo(PlannerInfo *root, RelOptInfo *baserel,
Relids required_outer)
{
ParamPathInfo *ppi;//ppi變數
Relids joinrelids;//參與連線的relids
List *pclauses;//條件連結串列
double rows;//行數
ListCell *lc;//臨時變數
/* Unparameterized paths have no ParamPathInfo */
if (bms_is_empty(required_outer))
return NULL;
Assert(!bms_overlap(baserel->relids, required_outer));
/* If we already have a PPI for this parameterization, just return it */
if ((ppi = find_param_path_info(baserel, required_outer)))//已有快取?
return ppi;//直接返回
/*
* Identify all joinclauses that are movable to this base rel given this
* parameterization.
* 在給定引數條件下,識別所有可移動到此基礎關係的連線子句。
*/
joinrelids = bms_union(baserel->relids, required_outer);//合併relids
pclauses = NIL;
foreach(lc, baserel->joininfo)//遍歷連線條件
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);//連線條件
if (join_clause_is_movable_into(rinfo,
baserel->relids,
joinrelids))
pclauses = lappend(pclauses, rinfo);//如可以移動,新增到連結串列中
}
/*
* Add in joinclauses generated by EquivalenceClasses, too. (These
* necessarily satisfy join_clause_is_movable_into.)
* 透過等價類產生的連線條件一併合併到連結串列中
*/
pclauses = list_concat(pclauses,
generate_join_implied_equalities(root,
joinrelids,
required_outer,
baserel));
/* Estimate the number of rows returned by the parameterized scan */
rows = get_parameterized_baserel_size(root, baserel, pclauses);//獲取估算行數
/* And now we can build the ParamPathInfo */
ppi = makeNode(ParamPathInfo);//構造PPI返回節點
ppi->ppi_req_outer = required_outer;
ppi->ppi_rows = rows;
ppi->ppi_clauses = pclauses;
baserel->ppilist = lappend(baserel->ppilist, ppi);
return ppi;
}
//--------------------------------- get_parameterized_baserel_size
/*
* get_parameterized_baserel_size
* Make a size estimate for a parameterized scan of a base relation.
* 估算引數化掃描基礎關係的大小
*
* 'param_clauses' lists the additional join clauses to be used.
* param_clauses是使用的連線條件連結串列
*
* set_baserel_size_estimates must have been applied already.
* 呼叫此函式前,要求已呼叫set_baserel_size_estimates函式
*/
double
get_parameterized_baserel_size(PlannerInfo *root, RelOptInfo *rel,
List *param_clauses)
{
List *allclauses;
double nrows;
/*
* Estimate the number of rows returned by the parameterized scan, knowing
* that it will apply all the extra join clauses as well as the rel's own
* restriction clauses. Note that we force the clauses to be treated as
* non-join clauses during selectivity estimation.
*/
allclauses = list_concat(list_copy(param_clauses),
rel->baserestrictinfo);//合併條件
nrows = rel->tuples *
clauselist_selectivity(root,
allclauses,
rel->relid, /* do not use 0! */
JOIN_INNER,
NULL);//獲取行數
nrows = clamp_row_est(nrows);
/* For safety, make sure result is not more than the base estimate */
if (nrows > rel->rows)
nrows = rel->rows;
return nrows;//返回
}
//-------------------------------------------- cost_seqscan
/*
* cost_seqscan
* Determines and returns the cost of scanning a relation sequentially.
* 計算順序掃描Rel的成本並返回
*
* 'baserel' is the relation to be scanned
* baserel:即將被掃描的Rel
*
* 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
* param_info:ppi結構體
*/
void
cost_seqscan(Path *path, PlannerInfo *root,
RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;//啟動成本
Cost cpu_run_cost;//CPU成本
Cost disk_run_cost;//IO成本
double spc_seq_page_cost;//
QualCost qpqual_cost;//表示式成本
Cost cpu_per_tuple;//每個元組的CPU成本
/* Should only be applied to base relations */
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_RELATION);
/* Mark the path with the correct row estimate */
if (param_info)//存在PPI
path->rows = param_info->ppi_rows;//行數
else
path->rows = baserel->rows;//直接取基礎關係行數
if (!enable_seqscan)
startup_cost += disable_cost;//不允許順序掃描,disable_cost=1.0e10,即1後面10個0,這樣的路徑無需考慮
/* fetch estimated page cost for tablespace containing table */
get_tablespace_page_costs(baserel->reltablespace,
NULL,
&spc_seq_page_cost);//獲取順序訪問表空間page的成本
/*
* disk costs
*/
disk_run_cost = spc_seq_page_cost * baserel->pages;//IO成本
/* CPU costs */
get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);//CPU成本
startup_cost += qpqual_cost.startup;//啟動成本
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;//處理每個元組的成本
cpu_run_cost = cpu_per_tuple * baserel->tuples;//CPU執行過程中的成本
/* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->pathtarget->cost.startup;//加上獲取最終投影列的成本
cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows;//加上獲取最終投影列的成本
/* Adjust costing for parallelism, if used. */
if (path->parallel_workers > 0)//並行執行
{
double parallel_divisor = get_parallel_divisor(path);//拆分多少份
/* The CPU cost is divided among all the workers. */
cpu_run_cost /= parallel_divisor;//每一份的成本
/*
* It may be possible to amortize some of the I/O cost, but probably
* not very much, because most operating systems already do aggressive
* prefetching. For now, we assume that the disk run cost can't be
* amortized at all.
*/
/*
* In the case of a parallel plan, the row count needs to represent
* the number of tuples processed per worker.
*/
path->rows = clamp_row_est(path->rows / parallel_divisor);//每一份的行數
}
path->startup_cost = startup_cost;//賦值
path->total_cost = startup_cost + cpu_run_cost + disk_run_cost;//總成本=啟動 + 執行期的CPU和IO成本
}
//-------------------------------- get_tablespace_page_costs
/*
* get_tablespace_page_costs
* Return random and/or sequential page costs for a given tablespace.
* 返回給定表空間的隨機/順序讀取page的成本
*
* This value is not locked by the transaction, so this value may
* be changed while a SELECT that has used these values for planning
* is still executing.
*/
void
get_tablespace_page_costs(Oid spcid,//表空間Oid
double *spc_random_page_cost,
double *spc_seq_page_cost)
{
TableSpaceCacheEntry *spc = get_tablespace(spcid);
Assert(spc != NULL);
if (spc_random_page_cost)//隨機讀取
{
if (!spc->opts || spc->opts->random_page_cost < 0)
*spc_random_page_cost = random_page_cost;
else
*spc_random_page_cost = spc->opts->random_page_cost;
}
if (spc_seq_page_cost)//順序讀取
{
if (!spc->opts || spc->opts->seq_page_cost < 0)
*spc_seq_page_cost = seq_page_cost;
else
*spc_seq_page_cost = spc->opts->seq_page_cost;
}
}
//-------------------------------- get_restriction_qual_cost
/*
* get_restriction_qual_cost
* Compute evaluation costs of a baserel's restriction quals, plus any
* movable join quals that have been pushed down to the scan.
* Results are returned into *qpqual_cost.
* 計算base rel限制條件的估算成本,包括被下推到限制條件的連線條件
*
* This is a convenience subroutine that works for seqscans and other cases
* where all the given quals will be evaluated the hard way. It's not useful
* for cost_index(), for example, where the index machinery takes care of
* some of the quals. We assume baserestrictcost was previously set by
* set_baserel_size_estimates().
*/
static void
get_restriction_qual_cost(PlannerInfo *root, RelOptInfo *baserel,
ParamPathInfo *param_info,
QualCost *qpqual_cost)
{
if (param_info)//引數化資訊
{
/* Include costs of pushed-down clauses */
cost_qual_eval(qpqual_cost, param_info->ppi_clauses, root);//評估成本
qpqual_cost->startup += baserel->baserestrictcost.startup;
qpqual_cost->per_tuple += baserel->baserestrictcost.per_tuple;
}
else
*qpqual_cost = baserel->baserestrictcost;//沒有引數化資訊,直接返回
}
//------------------- cost_qual_eval
/*
* cost_qual_eval
* Estimate the CPU costs of evaluating a WHERE clause.
* The input can be either an implicitly-ANDed list of boolean
* expressions, or a list of RestrictInfo nodes. (The latter is
* preferred since it allows caching of the results.)
* The result includes both a one-time (startup) component,
* and a per-evaluation component.
*/
void
cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root)
{
cost_qual_eval_context context;
ListCell *l;
context.root = root;
context.total.startup = 0;
context.total.per_tuple = 0;
/* We don't charge any cost for the implicit ANDing at top level ... */
foreach(l, quals)//遍歷連結串列
{
Node *qual = (Node *) lfirst(l);
cost_qual_eval_walker(qual, &context);//遍歷表示式
}
*cost = context.total;//返回總成本
}
//------------ cost_qual_eval_walker
static bool
cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
{
if (node == NULL)
return false;
/*
* RestrictInfo nodes contain an eval_cost field reserved for this
* routine's use, so that it's not necessary to evaluate the qual clause's
* cost more than once. If the clause's cost hasn't been computed yet,
* the field's startup value will contain -1.
*/
if (IsA(node, RestrictInfo))//約束條件
{
RestrictInfo *rinfo = (RestrictInfo *) node;
if (rinfo->eval_cost.startup < 0)//未計算成本,初始值為-1
{
cost_qual_eval_context locContext;
locContext.root = context->root;
locContext.total.startup = 0;
locContext.total.per_tuple = 0;
/*
* For an OR clause, recurse into the marked-up tree so that we
* set the eval_cost for contained RestrictInfos too.
*/
if (rinfo->orclause)
cost_qual_eval_walker((Node *) rinfo->orclause, &locContext);//遞迴OR條件
else
cost_qual_eval_walker((Node *) rinfo->clause, &locContext);//遞迴
/*
* If the RestrictInfo is marked pseudoconstant, it will be tested
* only once, so treat its cost as all startup cost.
*/
if (rinfo->pseudoconstant)//pseudoconstant標誌為T
{
/* count one execution during startup */
locContext.total.startup += locContext.total.per_tuple;
locContext.total.per_tuple = 0;
}
rinfo->eval_cost = locContext.total;
}
context->total.startup += rinfo->eval_cost.startup;
context->total.per_tuple += rinfo->eval_cost.per_tuple;
/* do NOT recurse into children */
return false;
}
/*
* For each operator or function node in the given tree, we charge the
* estimated execution cost given by pg_proc.procost (remember to multiply
* this by cpu_operator_cost).
*
* Vars and Consts are charged zero, and so are boolean operators (AND,
* OR, NOT). Simplistic, but a lot better than no model at all.
*
* Should we try to account for the possibility of short-circuit
* evaluation of AND/OR? Probably *not*, because that would make the
* results depend on the clause ordering, and we are not in any position
* to expect that the current ordering of the clauses is the one that's
* going to end up being used. The above per-RestrictInfo caching would
* not mix well with trying to re-order clauses anyway.
*
* Another issue that is entirely ignored here is that if a set-returning
* function is below top level in the tree, the functions/operators above
* it will need to be evaluated multiple times. In practical use, such
* cases arise so seldom as to not be worth the added complexity needed;
* moreover, since our rowcount estimates for functions tend to be pretty
* phony, the results would also be pretty phony.
*/
if (IsA(node, FuncExpr))//函式表示式
{
context->total.per_tuple +=
get_func_cost(((FuncExpr *) node)->funcid) * cpu_operator_cost;//呼叫get_func_cost函式
}
else if (IsA(node, OpExpr) ||
IsA(node, DistinctExpr) ||
IsA(node, NullIfExpr))//運算子/Distinct/NullIf判斷,呼叫get_func_cost
{
/* rely on struct equivalence to treat these all alike */
set_opfuncid((OpExpr *) node);
context->total.per_tuple +=
get_func_cost(((OpExpr *) node)->opfuncid) * cpu_operator_cost;
}
else if (IsA(node, ScalarArrayOpExpr))//陣列
{
/*
* Estimate that the operator will be applied to about half of the
* array elements before the answer is determined.
*/
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
Node *arraynode = (Node *) lsecond(saop->args);
set_sa_opfuncid(saop);
context->total.per_tuple += get_func_cost(saop->opfuncid) *
cpu_operator_cost * estimate_array_length(arraynode) * 0.5;
}
else if (IsA(node, Aggref) ||
IsA(node, WindowFunc))//聚合函式或者視窗函式
{
/*
* Aggref and WindowFunc nodes are (and should be) treated like Vars,
* ie, zero execution cost in the current model, because they behave
* essentially like Vars at execution. We disregard the costs of
* their input expressions for the same reason. The actual execution
* costs of the aggregate/window functions and their arguments have to
* be factored into plan-node-specific costing of the Agg or WindowAgg
* plan node.
*/
return false; /* 0成本,不再遞迴到子節點中,don't recurse into children */
}
else if (IsA(node, CoerceViaIO))//CoerceViaIO
{
CoerceViaIO *iocoerce = (CoerceViaIO *) node;
Oid iofunc;
Oid typioparam;
bool typisvarlena;
/* check the result type's input function */
getTypeInputInfo(iocoerce->resulttype,
&iofunc, &typioparam);
context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
/* check the input type's output function */
getTypeOutputInfo(exprType((Node *) iocoerce->arg),
&iofunc, &typisvarlena);
context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
}
else if (IsA(node, ArrayCoerceExpr))//ArrayCoerceExpr
{
ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
QualCost perelemcost;
cost_qual_eval_node(&perelemcost, (Node *) acoerce->elemexpr,
context->root);
context->total.startup += perelemcost.startup;
if (perelemcost.per_tuple > 0)
context->total.per_tuple += perelemcost.per_tuple *
estimate_array_length((Node *) acoerce->arg);
}
else if (IsA(node, RowCompareExpr))//RowCompareExpr
{
/* Conservatively assume we will check all the columns */
RowCompareExpr *rcexpr = (RowCompareExpr *) node;
ListCell *lc;
foreach(lc, rcexpr->opnos)
{
Oid opid = lfirst_oid(lc);
context->total.per_tuple += get_func_cost(get_opcode(opid)) *
cpu_operator_cost;
}
}
else if (IsA(node, MinMaxExpr) ||
IsA(node, SQLValueFunction) ||
IsA(node, XmlExpr) ||
IsA(node, CoerceToDomain) ||
IsA(node, NextValueExpr))//最小最大值/SQLValueFunction/XML表示式/CoerceToDomain/NextValueExpr
{
/* Treat all these as having cost 1 */
context->total.per_tuple += cpu_operator_cost;
}
else if (IsA(node, CurrentOfExpr))//CurrentOfExpr
{
/* Report high cost to prevent selection of anything but TID scan */
context->total.startup += disable_cost;//不考慮順序掃描,使用TID掃描
}
else if (IsA(node, SubLink))
{
/* This routine should not be applied to un-planned expressions */
elog(ERROR, "cannot handle unplanned sub-select");//子連結,報錯
}
else if (IsA(node, SubPlan))//子計劃
{
/*
* A subplan node in an expression typically indicates that the
* subplan will be executed on each evaluation, so charge accordingly.
* (Sub-selects that can be executed as InitPlans have already been
* removed from the expression.)
*/
SubPlan *subplan = (SubPlan *) node;
context->total.startup += subplan->startup_cost;//直接相加
context->total.per_tuple += subplan->per_call_cost;
/*
* We don't want to recurse into the testexpr, because it was already
* counted in the SubPlan node's costs. So we're done.
*/
return false;
}
else if (IsA(node, AlternativeSubPlan))//AlternativeSubPlan
{
/*
* Arbitrarily use the first alternative plan for costing. (We should
* certainly only include one alternative, and we don't yet have
* enough information to know which one the executor is most likely to
* use.)
*/
AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
return cost_qual_eval_walker((Node *) linitial(asplan->subplans),
context);
}
else if (IsA(node, PlaceHolderVar))//PHV,成本為0
{
/*
* A PlaceHolderVar should be given cost zero when considering general
* expression evaluation costs. The expense of doing the contained
* expression is charged as part of the tlist eval costs of the scan
* or join where the PHV is first computed (see set_rel_width and
* add_placeholders_to_joinrel). If we charged it again here, we'd be
* double-counting the cost for each level of plan that the PHV
* bubbles up through. Hence, return without recursing into the
* phexpr.
*/
return false;
}
/* recurse into children */
return expression_tree_walker(node, cost_qual_eval_walker,
(void *) context);//遞迴到子節點中
}
//------- get_func_cost
/*
* get_func_cost
* Given procedure id, return the function's procost field.
*/
float4
get_func_cost(Oid funcid)
{
HeapTuple tp;
float4 result;
tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));//獲取函式Oid
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for function %u", funcid);
//查詢資料字典:select proname,procost from pg_proc where procost > 1;
result = ((Form_pg_proc) GETSTRUCT(tp))->procost;//直接獲取函式的procost
ReleaseSysCache(tp);
return result;
}
三、跟蹤分析
啟動gdb:
(gdb) b set_base_rel_pathlists
Breakpoint 1 at 0x73bfb5: file allpaths.c, line 296.
(gdb) c
Continuing.
Breakpoint 1, set_base_rel_pathlists (root=0x2fd9418) at allpaths.c:296
296 for (rti = 1; rti < root->simple_rel_array_size; rti++)
進入set_plain_rel_pathlist:
(gdb)
452 set_plain_rel_pathlist(root, rel, rte);
(gdb) step
set_plain_rel_pathlist (root=0x2fd9418, rel=0x2f98278, rte=0x2eaa5d8) at allpaths.c:704
704 required_outer = rel->lateral_relids;
進入create_seqscan_path:
(gdb) step
create_seqscan_path (root=0x2fd9418, rel=0x2f98278, required_outer=0x0, parallel_workers=0) at pathnode.c:957
957 Path *pathnode = makeNode(Path);
(gdb)
969 cost_seqscan(pathnode, root, rel, pathnode->param_info);
(gdb) p *pathnode
$2 = {type = T_Path, pathtype = T_SeqScan, parent = 0x2f98278, pathtarget = 0x2f98488, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 0, startup_cost = 0, total_cost = 0,
pathkeys = 0x0}
進入cost_seqscan:
...
(gdb)
230 path->rows = baserel->rows;
#rows的獲得上一節已作介紹
(gdb) p baserel->rows
$4 = 10926
...
#表空間順序掃描的成本
(gdb) p spc_seq_page_cost
$5 = 1
#IO成本
(gdb) p disk_run_cost
$6 = 726
進入get_restriction_qual_cost:
(gdb) step
get_restriction_qual_cost (root=0x2fd9418, baserel=0x2f98278, param_info=0x0, qpqual_cost=0x7ffe12ca4620) at costsize.c:3999
3999 if (param_info)
#沒有引數化資訊,直接使用baserel->baserestrictcost
(gdb) n
4008 *qpqual_cost = baserel->baserestrictcost;
(gdb) p baserel->baserestrictcost
$7 = {startup = 0, per_tuple = 0.0050000000000000001}
回到cost_seqscan
(gdb)
cost_seqscan (path=0x2f98990, root=0x2fd9418, baserel=0x2f98278, param_info=0x0) at costsize.c:248
248 startup_cost += qpqual_cost.startup;
...
執行cost_seqscan,最終的path:
(gdb) p *path
$8 = {type = T_Path, pathtype = T_SeqScan, parent = 0x2f98278, pathtarget = 0x2f98488, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10926, startup_cost = 0, total_cost = 2226,
pathkeys = 0x0}
(gdb) p cpu_run_cost
$9 = 1500
(gdb) p disk_run_cost
$10 = 726
回到上層函式:
(gdb) n
create_seqscan_path (root=0x2fd9418, rel=0x2f98278, required_outer=0x0, parallel_workers=0) at pathnode.c:971
971 return pathnode;
(gdb)
972 }
(gdb)
set_plain_rel_pathlist (root=0x2fd9418, rel=0x2f98278, rte=0x2eaa5d8) at allpaths.c:710
710 if (rel->consider_parallel && required_outer == NULL)
繼續執行構建索引掃描路徑/TID掃描路徑函式:
714 create_index_paths(root, rel);
(gdb)
717 create_tidscan_paths(root, rel);
(gdb) n
718 }
索引掃描路徑的結果,rows = 10926, startup_cost = 324.40899999999999,total_cost = 1214.299
(gdb) p *rel->pathlist
$14 = {type = T_List, length = 1, head = 0x2fe8d10, tail = 0x2fe8d10}
(gdb) p *(Path *)rel->pathlist->head->data.ptr_value
$15 = {type = T_BitmapHeapPath, pathtype = T_BitmapHeapScan, parent = 0x2f98278, pathtarget = 0x2f98488, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10926, startup_cost = 324.40899999999999,
total_cost = 1214.299, pathkeys = 0x0}
結束呼叫
(gdb)
set_base_rel_pathlists (root=0x2fd9418) at allpaths.c:296
296 for (rti = 1; rti < root->simple_rel_array_size; rti++)
(gdb)
312 }
(gdb)
make_one_rel (root=0x2fd9418, joinlist=0x2f985d8) at allpaths.c:185
185 rel = make_rel_from_joinlist(root, joinlist);
#DONE!
相應的SQL執行計劃,cost=324.41..1214.30請參照索引掃描路徑結果(這部分原始碼下一節分析):
testdb=# explain analyze verbose select t1.* from t_dwxx t1 where dwbh > '10000' and dwbh < '20000';
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
---
Bitmap Heap Scan on public.t_dwxx t1 (cost=324.41..1214.30 rows=10926 width=23) (actual time=3.196..4.959 rows=11111 loops=
1)
Output: dwmc, dwbh, dwdz
Recheck Cond: (((t1.dwbh)::text > '10000'::text) AND ((t1.dwbh)::text < '20000'::text))
Heap Blocks: exact=85
-> Bitmap Index Scan on t_dwxx_pkey (cost=0.00..321.68 rows=10926 width=0) (actual time=3.159..3.159 rows=11111 loops=1)
Index Cond: (((t1.dwbh)::text > '10000'::text) AND ((t1.dwbh)::text < '20000'::text))
Planning Time: 0.315 ms
Execution Time: 5.673 ms
(8 rows)
四、參考資料
allpaths.c
cost.h
costsize.c
PG Document:Query Planning
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/6906/viewspace-2374855/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- PostgreSQL 原始碼解讀(68)- 查詢語句#53(make_one_rel函式#18-...SQL原始碼函式
- PostgreSQL 原始碼解讀(38)- 查詢語句#23(query_planner函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(65)- 查詢語句#50(make_one_rel函式#15-...SQL原始碼函式
- PostgreSQL 原始碼解讀(62)- 查詢語句#47(make_one_rel函式#12-...SQL原始碼函式
- PostgreSQL 原始碼解讀(63)- 查詢語句#48(make_one_rel函式#13-...SQL原始碼函式
- PostgreSQL 原始碼解讀(64)- 查詢語句#49(make_one_rel函式#14-...SQL原始碼函式
- PostgreSQL 原始碼解讀(60)- 查詢語句#45(make_one_rel函式#10-...SQL原始碼函式
- PostgreSQL 原始碼解讀(61)- 查詢語句#46(make_one_rel函式#11-...SQL原始碼函式
- PostgreSQL 原始碼解讀(69)- 查詢語句#54(make_one_rel函式#19-...SQL原始碼函式
- PostgreSQL 原始碼解讀(70)- 查詢語句#55(make_one_rel函式#20-...SQL原始碼函式
- PostgreSQL 原始碼解讀(66)- 查詢語句#51(make_one_rel函式#16-...SQL原始碼函式
- PostgreSQL 原始碼解讀(67)- 查詢語句#52(make_one_rel函式#17-...SQL原始碼函式
- PostgreSQL 原始碼解讀(71)- 查詢語句#56(make_one_rel函式#21-...SQL原始碼函式
- PostgreSQL 原始碼解讀(72)- 查詢語句#57(make_one_rel函式#22-...SQL原始碼函式
- PostgreSQL 原始碼解讀(52)- 查詢語句#37(make_one_rel函式#2-估...SQL原始碼函式
- PostgreSQL 原始碼解讀(54)- 查詢語句#39(make_one_rel函式#4-生...SQL原始碼函式
- PostgreSQL 原始碼解讀(55)- 查詢語句#40(make_one_rel函式#5-索...SQL原始碼函式
- PostgreSQL 原始碼解讀(56)- 查詢語句#41(make_one_rel函式#6-索...SQL原始碼函式
- PostgreSQL 原始碼解讀(57)- 查詢語句#42(make_one_rel函式#7-索...SQL原始碼函式
- PostgreSQL 原始碼解讀(58)- 查詢語句#43(make_one_rel函式#8-B...SQL原始碼函式
- PostgreSQL 原始碼解讀(59)- 查詢語句#44(make_one_rel函式#9-B...SQL原始碼函式
- PostgreSQL 原始碼解讀(49)- 查詢語句#34(make_one_rel函式#1-概覽)SQL原始碼函式
- PostgreSQL 原始碼解讀(83)- 查詢語句#68(PortalStart函式)SQL原始碼函式
- PostgreSQL 原始碼解讀(90)- 查詢語句#75(ExecHashJoin函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(91)- 查詢語句#76(ExecHashJoin函式#2)SQL原始碼函式
- PostgreSQL 原始碼解讀(93)- 查詢語句#77(ExecHashJoin函式#3)SQL原始碼函式
- PostgreSQL 原始碼解讀(52)- 查詢語句#37(make_one_rel函式#2-估算關係大小)SQL原始碼函式
- PostgreSQL 原始碼解讀(95)- 查詢語句#78(ExecHashJoin函式#4-H...SQL原始碼函式
- PostgreSQL 原始碼解讀(97)- 查詢語句#79(ExecHashJoin函式#5-H...SQL原始碼函式
- PostgreSQL 原始碼解讀(88)- 查詢語句#73(SeqNext函式#1)SQL原始碼函式
- PostgreSQL 原始碼解讀(89)- 查詢語句#74(SeqNext函式#2)SQL原始碼函式
- PostgreSQL 原始碼解讀(46)- 查詢語句#31(query_planner函式#7)SQL原始碼函式
- PostgreSQL 原始碼解讀(47)- 查詢語句#32(query_planner函式#8)SQL原始碼函式
- PostgreSQL 原始碼解讀(48)- 查詢語句#33(query_planner函式#9)SQL原始碼函式
- PostgreSQL 原始碼解讀(41)- 查詢語句#26(query_planner函式#4)SQL原始碼函式
- PostgreSQL 原始碼解讀(40)- 查詢語句#25(query_planner函式#3)SQL原始碼函式
- PostgreSQL 原始碼解讀(43)- 查詢語句#28(query_planner函式#5)SQL原始碼函式
- PostgreSQL 原始碼解讀(45)- 查詢語句#30(query_planner函式#6)SQL原始碼函式