本文分享自華為雲社群《【防過載檢查項】》,作者: 譡裡個檔。
1. GUC引數檢查
目的:針對不同版本建議設定不同的引數值,當前先檢查出來,後續diagnosis會給出建議值
SELECT split_part((substring(version() from '\((.*)\)')), ' ', 2) AS version, (EXISTS (SELECT 1 FROM (SELECT count(DISTINCT node_name) AS dn_cnt FROM pgxc_node WHERE node_type = 'D' AND node_host <> 'localhost' GROUP BY node_host) WHERE dn_cnt <> 1) ) AS sdn_per_node, node_name, name AS guc_name, CASE WHEN unit = 'ms' THEN setting::bigint/1000 || ' s' WHEN unit = 's' THEN setting || ' s' WHEN unit = 'KB' THEN pg_size_pretty(setting*1024) WHEN unit = '8KB' THEN pg_size_pretty(setting*1024*8) ELSE setting END AS setting, unit, CASE WHEN name = 'default_distribution_mode' THEN CASE WHEN setting <> 'roundrobin' THEN 'roundrobin' END WHEN name = 'autovacuum' THEN CASE WHEN setting <> 'on' THEN 'on' END WHEN name = 'autovacuum_max_workers' THEN CASE WHEN setting::int > 6 THEN '6' END WHEN name = 'autovacuum_max_workers' THEN CASE WHEN setting::int > 3 THEN '3' END WHEN name = 'session_timeout' THEN CASE WHEN setting::int > 600 OR setting::int = 0 THEN '<= 10min' END WHEN name = 'statement_timeout' THEN CASE WHEN setting::int > 24* 60 * 1000 OR setting::int = 0 THEN '<= 24h' END WHEN name = 'sql_use_spacelimit' THEN CASE WHEN setting::int > 300*1024*1024 OR setting IN ('0', '-1') THEN '<= 300GB' END WHEN name = 'temp_file_limit' THEN CASE WHEN setting::int > 100*1024*1024 OR setting IN ('0', '-1') THEN '<= 100GB' END WHEN name = 'udf_memory_limit' THEN CASE WHEN setting::int > 1*1024*1024 THEN '<= 1GB' END WHEN name = 'query_dop' THEN CASE WHEN setting::int = 0 THEN ' -4' END WHEN name = 'max_streams_per_query' THEN CASE WHEN setting::int = -1 THEN '50' END WHEN name = 'max_connections' THEN CASE WHEN node_name = 'dn_6001_6002' AND setting::int < 5000 THEN '>=5000' END ELSE '' END AS diagnosis FROM pgxc_parallel_query('all', 'SELECT pgxc_node_str() AS node_name, name, setting, unit FROM pg_settings WHERE pgxc_node_str() IN (''cn_5001'',''dn_6001_6002'') AND name in (''max_streams_per_query'',''query_dop'' ,''sql_use_spacelimit'',''temp_file_limit'',''default_distribution_mode'', ''autovacuum_mode'',''autovacuum'',''autovacuum_max_workers_hstore'',''autovacuum_max_workers'', ''session_timeout'',''statement_timeout'',''ddl_lock_timeout'',''idle_in_transaction_timeout'', ''max_connections'',''min_pool_size'',''max_pool_size'', ''max_stream_pool'',''max_active_statements'',''max_prepared_transactions'', ''cstore_buffers'',''shared_buffers'', ''max_process_memory'', ''udf_memory_limit'', ''max_process_memory_balanced'', ''bbox_dump_count'', ''enable_bbox_dump'')') AS (node_name name, name text, setting text, unit text) ORDER BY node_name, name;
2. 大表檢查
目的:識別大表,建議客戶整改,避免磁碟過載
8.1.3版本使用如下SQL
SELECT CASE WHEN (skewsize > avgsize * 0.10 AND skewsize > 50 * 1024) THEN 'skew table' WHEN (reloptions::text LIKE '%orientation=column%' AND reloptions::text LIKE '%compression=no%') THEN 'uncompressed column table' WHEN (x.pclocatortype = 'R' AND avgsize > 10 * 1024) THEN 'large replicattion table' WHEN (pg_stat_get_dead_tuples(c.oid) >100000 AND pg_stat_get_dead_tuples(c.oid)/(pg_stat_get_dead_tuples(c.oid)+pg_stat_get_live_tuples(c.oid)) > 0.4) THEN 'dirty table' ELSE 'normal large table' END AS diagnostic, t1.schemaname, -- 表的schema t1.tablename, -- 表名 a.rolname AS tableowner, x.pgroup AS nodegroup, CASE x.pclocatortype WHEN 'H' THEN 'Hash' WHEN 'N' THEN 'Round Robin' WHEN 'R' THEN 'Replicate' END AS locatortype, CASE WHEN c.parttype = 'p' THEN true ELSE false END AS ispartitioned, CASE WHEN reloptions::text LIKE '%orientation=column%' THEN 'column' WHEN reloptions::text LIKE '%orientation=row%' THEN 'row' END AS orientation, t1.dnnum, -- 表的node group的DN數 t1.totalsize AS "totalsize(MB)", -- 表的size ,單位MB t1.avgsize AS "avgsize(MB)", -- 平均每個DN上資料量,單位MB t1.skewsize AS "skewsize(MB)", -- 不同DN上資料size的最大差值,單位MB t1.skewdn, -- 資料量最大的DN t1.maxratio, -- 資料量最大DN的size/平均size t1.minratio, -- 資料量最小DN的size/平均size t1.skewratio -- 不同DN上資料size的最大差值/平均size FROM ( -- 預處理,識別傾斜表 SELECT schemaname, tablename, skewdn, dnnum, totalsize, avgsize, skewsize, (maxsize/avgsize)::numeric(20,2) AS maxratio, (minsize/avgsize)::numeric(20,2) AS minratio, (skewsize/avgsize)::numeric(20,2) AS skewratio FROM ( SELECT schemaname,tablename,skewdn,count(1) AS dnnum,sum(dnsize) AS totalsize, avg(dnsize) AS avgsize,max(dnsize) AS maxsize,min(dnsize) AS minsize, (max(dnsize) - min(dnsize)) AS skewsize FROM ( --對每個表的資料按照DN資料量大小排序,以及獲取傾斜的dn SELECT schemaname, tablename, nodename, (dnsize/1024/1024)::bigint AS dnsize, -- 單位換算為MB first_value(nodename) over (PARTITION BY schemaname, tablename ORDER BY dnsize DESC, nodename) AS skewdn -- --資料量最大的DN FROM ( -- 獲取大於10GB的表 SELECT schemaname, tablename,(rd).nodename, ((rd).dnsize + 1) AS dnsize FROM ( SELECT schemaname, tablename, gs_table_distribution(schemaname, tablename) AS rd FROM gs_table_distribution() WHERE schemaname NOT IN ('pg_catalog', 'dbms_om', 'cstore') AND relkind = 'r' GROUP BY schemaname, tablename HAVING sum(dnsize) > 50.0 * 1024 * 1024 * 1024 -- 總大小大於100GB ) ) ) GROUP BY schemaname,tablename, skewdn ) ) t1 INNER JOIN pg_class c ON c.relname = t1.tablename LEFT JOIN pg_namespace n ON n.nspname = t1.schemaname LEFT JOIN pg_authid a ON a.oid = c.relowner LEFT JOIN pgxc_class x ON x.pcrelid = c.oid WHERE c.reloptions::text NOT LIKE '%internal_mask%' ORDER BY totalsize DESC, diagnostic, skewsize DESC ;
8.2.1和8.2.0版本使用如下
-- 大表診斷 SELECT CASE WHEN (skewsize > avgsize * 0.10 AND skewsize > 50 * 1024) THEN 'skew table' WHEN (reloptions::text LIKE '%orientation=column%' AND reloptions::text LIKE '%compression=no%') THEN 'uncompressed column table' WHEN (x.pclocatortype = 'R' AND avgsize > 10 * 1024) THEN 'large replicattion table' WHEN (pg_stat_get_dead_tuples(c.oid) >100000 AND pg_stat_get_dead_tuples(c.oid)/(pg_stat_get_dead_tuples(c.oid)+pg_stat_get_live_tuples(c.oid)) > 0.4) THEN 'dirty table' WHEN (reloptions::text LIKE '%orientation=column%') THEN CASE WHEN (SELECT total_cu_count > 0 AND (zero_size_cu_count + small_cu_count)/total_cu_count > 0.5 FROM get_col_cu_info(t1.schemaname, t1.tablename)) THEN 'small cu table' ELSE 'normal large table' END ELSE 'normal large table' END AS diagnostic, t1.schemaname, -- 表的schema t1.tablename, -- 表名 a.rolname AS tableowner, x.pgroup AS nodegroup, CASE x.pclocatortype WHEN 'H' THEN 'Hash' WHEN 'N' THEN 'Round Robin' WHEN 'R' THEN 'Replicate' END AS locatortype, CASE WHEN c.parttype = 'p' THEN true ELSE false END AS ispartitioned, CASE WHEN reloptions::text LIKE '%orientation=column%' THEN 'column' WHEN reloptions::text LIKE '%orientation=row%' THEN 'row' END AS orientation, t1.dnnum, -- 表的node group的DN數 t1.totalsize AS "totalsize(MB)", -- 表的size ,單位MB t1.avgsize AS "avgsize(MB)", -- 平均每個DN上資料量,單位MB t1.skewsize AS "skewsize(MB)", -- 不同DN上資料size的最大差值,單位MB t1.skewdn, -- 資料量最大的DN t1.maxratio, -- 資料量最大DN的size/平均size t1.minratio, -- 資料量最小DN的size/平均size t1.skewratio -- 不同DN上資料size的最大差值/平均size FROM ( -- 預處理,識別傾斜表 SELECT schemaname, tablename, skewdn, dnnum, totalsize, avgsize, skewsize, (maxsize/avgsize)::numeric(20,2) AS maxratio, (minsize/avgsize)::numeric(20,2) AS minratio, (skewsize/avgsize)::numeric(20,2) AS skewratio FROM ( SELECT schemaname, tablename, skewdn, count(1) AS dnnum, sum(dnsize) AS totalsize, avg(dnsize) AS avgsize, max(dnsize) AS maxsize, min(dnsize) AS minsize, (max(dnsize) - min(dnsize)) AS skewsize FROM ( --對每個表的資料按照DN資料量大小排序,以及獲取傾斜的dn SELECT schemaname, tablename, nodename, (dnsize/1024/1024)::bigint AS dnsize, -- 單位換算為MB first_value(nodename) over (PARTITION BY schemaname, tablename ORDER BY dnsize DESC, nodename) AS skewdn -- --資料量最大的DN FROM ( -- 獲取大於10GB的表 SELECT schemaname, tablename,(rd).nodename, ((rd).dnsize + 1) AS dnsize FROM ( SELECT schemaname, tablename, gs_table_distribution(schemaname, tablename) AS rd FROM gs_table_distribution() WHERE schemaname NOT IN ('pg_catalog', 'dbms_om', 'cstore') AND relkind = 'r' GROUP BY schemaname, tablename HAVING sum(dnsize) > 50* 1024 * 1024 * 1024.0 -- 總大小大於100GB ) ) ) GROUP BY schemaname,tablename, skewdn ) ) t1 INNER JOIN pg_class c ON c.relname = t1.tablename LEFT JOIN pg_namespace n ON n.nspname = t1.schemaname LEFT JOIN pg_authid a ON a.oid = c.relowner LEFT JOIN pgxc_class x ON x.pcrelid = c.oid WHERE c.reloptions::text NOT LIKE '%internal_mask%' ORDER BY totalsize DESC, diagnostic, skewsize DESC ;
8.3.0版本使用
-- 大表診斷 SELECT CASE WHEN (skewsize > avgsize * 0.10 AND skewsize > 50 * 1024) THEN 'skew table' WHEN (reloptions::text LIKE '%orientation=column%' AND reloptions::text LIKE '%compression=no%') THEN 'uncompressed column table' WHEN (x.pclocatortype = 'R' AND avgsize > 10 * 1024) THEN 'large replicattion table' WHEN (pg_stat_get_dead_tuples(c.oid) >100000 AND pg_stat_get_dead_tuples(c.oid)/(pg_stat_get_dead_tuples(c.oid)+pg_stat_get_live_tuples(c.oid)) > 0.4) THEN 'dirty table' WHEN (reloptions::text LIKE '%orientation=column%') THEN CASE WHEN (SELECT total_cu_count > 0 AND (zero_cu_count + small_cu_count)/total_cu_count > 0.5 FROM pgxc_get_small_cu_info(c.oid)) THEN 'small cu table' ELSE 'normal large table' END ELSE 'normal large table' END AS diagnostic, t1.schemaname, -- 表的schema t1.tablename, -- 表名 a.rolname AS tableowner, x.pgroup AS nodegroup, CASE x.pclocatortype WHEN 'H' THEN 'Hash' WHEN 'N' THEN 'Round Robin' WHEN 'R' THEN 'Replicate' END AS locatortype, CASE WHEN c.parttype = 'p' THEN true ELSE false END AS ispartitioned, CASE WHEN reloptions::text LIKE '%orientation=column%' THEN 'column' WHEN reloptions::text LIKE '%orientation=row%' THEN 'row' END AS orientation, t1.dnnum, -- 表的node group的DN數 t1.totalsize AS "totalsize(MB)", -- 表的size ,單位MB t1.avgsize AS "avgsize(MB)", -- 平均每個DN上資料量,單位MB t1.skewsize AS "skewsize(MB)", -- 不同DN上資料size的最大差值,單位MB t1.skewdn, -- 資料量最大的DN t1.maxratio, -- 資料量最大DN的size/平均size t1.minratio, -- 資料量最小DN的size/平均size t1.skewratio -- 不同DN上資料size的最大差值/平均size FROM ( -- 預處理,識別傾斜表 SELECT schemaname, tablename, skewdn, dnnum, totalsize, avgsize, skewsize, (maxsize/avgsize)::numeric(20,2) AS maxratio, (minsize/avgsize)::numeric(20,2) AS minratio, (skewsize/avgsize)::numeric(20,2) AS skewratio FROM ( SELECT schemaname, tablename, skewdn, count(1) AS dnnum, sum(dnsize) AS totalsize, avg(dnsize) AS avgsize, max(dnsize) AS maxsize, min(dnsize) AS minsize, (max(dnsize) - min(dnsize)) AS skewsize FROM ( --對每個表的資料按照DN資料量大小排序,以及獲取傾斜的dn SELECT schemaname, tablename, nodename, (dnsize/1024/1024)::bigint AS dnsize, -- 單位換算為MB first_value(nodename) over (PARTITION BY schemaname, tablename ORDER BY dnsize DESC, nodename) AS skewdn -- --資料量最大的DN FROM ( -- 獲取大於10GB的表 SELECT schemaname, tablename,(rd).nodename, ((rd).dnsize + 1) AS dnsize FROM ( SELECT schemaname, tablename, gs_table_distribution(schemaname, tablename) AS rd FROM gs_table_distribution() WHERE schemaname NOT IN ('pg_catalog', 'dbms_om', 'cstore') AND relkind = 'r' GROUP BY schemaname, tablename HAVING sum(dnsize) > 50.0 * 1024 * 1024 * 1024 -- 總大小大於100GB ) ) ) GROUP BY schemaname,tablename, skewdn ) ) t1 INNER JOIN pg_class c ON c.relname = t1.tablename LEFT JOIN pg_namespace n ON n.nspname = t1.schemaname LEFT JOIN pg_authid a ON a.oid = c.relowner LEFT JOIN pgxc_class x ON x.pcrelid = c.oid WHERE c.reloptions::text NOT LIKE '%internal_mask%' ORDER BY totalsize DESC, diagnostic, skewsize DESC ;
針對不同的診斷結果使用如下診斷措施
類別 |
建議手段 |
skew table |
根據業務選擇關聯常用,並且資料不傾斜的列作為修改分佈列,如果找不到合適的分佈列,可以把表修改為RoundRobin分佈 ALTER TABLE ctmes_tgmesmbi.mbi_hlottsthisDISTRIBUTE BY ROUNDROBIN;複製
|
uncompressed column table |
列存表的壓縮效果非常好,一般推薦使用壓縮,至少使用low級別壓縮 ALTER TABLE customer_address SET(compression=low); VACUUM FULL customer_address;複製
|
dirty table |
說明表檢測碎片率比較高,需要透過VACUUM整理表 VACUUM FULL customer_address;複製
|
small cu table |
說明表小CU比較多,需要透過VACUUM整理表 VACUUM FULL customer_address;複製
|
large replicattion table |
複製表在每個DN上都有一份全量資料,當表磁碟空間佔用過大時,需要把表修改為HASH表。一般複製表都是維表,存在主鍵,直接把分佈列修改為主鍵 ALTER TABLE ctmes_tgmesmbi.mbi_hlottsthisDISTRIBUTE BY HASH(id);複製 |
normal large table |
3. 冗餘索引診斷
目的:識別冗餘索引,建議客戶刪除。可以降低磁碟空間,並降低大規模資料匯入的時候的xlog規模
-- optimizable policy為duplicate的檢查項 -- 表明兩個索引欄位和欄位順序完全一致 -- 建議直接刪除optimizable index指定的索引; -- optimizable policy為redundancy檢查項表明 -- optimizable index指定的索引的索引列剛好是base index的索引列的前面欄位 -- 建議直接刪除optimizable index指定的索引; -- optimizable policy為optimizable檢查項 -- 表明optimizable index和base index這兩個索引的索引列完全重複,但是索引列的順序不一致 -- 這種場景需要人工介入分析是否可以最佳化 WITH info AS( SELECT quote_ident(n.nspname) || '.' || quote_ident(c.relname) AS tablename, pgroup AS nodegroup, x.indrelid AS indrelid, x.indexrelid AS indexrelid, indisunique, indisprimary, indnatts, indkey, indexprs FROM pg_index x INNER JOIN pg_class c ON c.oid = x.indrelid INNER JOIN pg_class i ON i.oid = x.indexrelid LEFT JOIN pg_namespace n ON n.oid = c.relnamespace INNER JOIN pgxc_class xc ON xc.pcrelid = c.oid WHERE c.relkind = 'r' AND c.oid >= 16384 AND (c.reloptions IS NULL OR c.reloptions::text NOT LIKE '%internal_mask%') AND i.relkind = 'i' AND i.oid >= 16384 AND x.indpred IS NULL ), base AS( SELECT tablename, nodegroup, i.indrelid, i.indexrelid baseidx, i.indisunique AS base_unique, i.indisprimary AS base_primary, x.indexrelid AS optidx, x.indisunique AS opt_unique, x.indisprimary AS opt_primary, CASE WHEN opt_primary > base_primary OR opt_unique > base_unique THEN true ELSE false END AS swap, CASE WHEN i.indkey = x.indkey AND coalesce(pg_get_expr(i.indexprs, i.indrelid), 'NULL') = coalesce(pg_get_expr(x.indexprs, x.indrelid), 'NULL') THEN 'duplicate'::text WHEN x.indexprs IS NOT NULL OR i.indexprs IS NOT NULL THEN NULL::text WHEN strpos(i.indkey::text, x.indkey::text||' ') = 1 OR strpos(x.indkey::text, i.indkey::text||' ') = 1 THEN 'redundancy'::text WHEN i.indkey @> x.indkey AND x.indkey @> i.indkey THEN 'optimizable'::text ELSE NULL END AS optpolicy FROM info i INNER JOIN pg_index x ON (i.indrelid = x.indrelid AND i.indexrelid > x.indexrelid) WHERE x.indpred IS NULL AND optpolicy IS NOT NULL ), tmp AS( SELECT tablename, indrelid, nodegroup, CASE WHEN swap THEN optidx ELSE baseidx END AS base_idx, CASE WHEN swap THEN opt_primary ELSE base_primary END AS base_primary, CASE WHEN swap THEN opt_unique ELSE base_unique END AS base_unique, CASE WHEN swap THEN baseidx ELSE optidx END AS opt_idx, CASE WHEN swap THEN base_primary ELSE opt_primary END AS opt_primary, CASE WHEN swap THEN base_unique ELSE opt_unique END AS opt_unique, optpolicy FROM base ) SELECT tablename, nodegroup, base_idx::regclass::text AS base_index, base_primary, base_unique, substring(pg_get_indexdef(base_idx) from 'USING .+\)') AS base_idxdef, opt_idx::regclass::text AS opt_index, opt_primary, opt_unique, substring(pg_get_indexdef(opt_idx) from 'USING .+\)') AS opt_idxdef, optpolicy, pg_get_tabledef(indrelid) FROM tmp ORDER BY 1, 2, 3 ;
點選關注,第一時間瞭解華為雲新鮮技術~