[20190703]12c Hybrid histogram.txt
[20190703]12c Hybrid histogram.txt
--//個人對直方圖瞭解很少,以前2種直方圖型別對於目前的許多應用來講已經足夠,或者講遇到的問題很少.
--//抽一點點時間,簡單探究12c HYBRID histogram.
--//以前已經探究過Top Frequency histogram,連結
--//http://blog.itpub.net/267265/viewspace-2140257/=>[20170603]12c Top Frequency histogram.txt
1.環境:
SCOTT@test01p> @ ver1
PORT_STRING VERSION BANNER CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0 12.2.0.1.0 Oracle Database 12c Enterprise Edition Release 12.2.0.1.0 - 64bit Production 0
2.測試例子建立:
create table t1 (owner varchar2(30));
create table t2 (owner varchar2(30));
$ cat aa.txt
APEX_040200 3405
ORDSYS 3157
MDSYS 1819
PUBLIC 1047
XDB 985
SYS 942
SYSTEM 641
CTXSYS 405
WMSYS 387
DVSYS 352
SH 309
ORDDATA 292
LBACSYS 209
OE 142
SCOTT 96
GSMADMIN_INTERNAL 77
IX 58
DBSNMP 55
PM 44
HR 35
OLAPSYS 25
OJVMSYS 23
DVF 19
FLOWS_FILES 13
AUDSYS 12
ORDPLUGINS 10
OUTLN 10
BI 8
ORACLE_OCM 8
SI_INFORMTN_SCHEM 8
APPQOSSYS 5
TEST 2
--//注這個是上次測試owner的資料分佈,還是以這個為藍本探究。
awk "{print \"insert into t1 select '\"$1\"'from dual connect by level \74=\",$2,\";\"}" aa.txt
awk "{print \"insert into t1 select '\"$1\"'from dual connect by level \x3c=\",$2,\";\"}" aa.txt
--//說明:windows下awk真變態,外層使用雙引號,內部無法解析雙引號。"<"不知道如何轉換,使用\74表示(實際上八進位制).
--//在vim下透過ga命令確定. 輸入< ,在該字元上打入ga,在提示行出現:<<> 60, 十六進位制 3c, 八進位制 074
--//執行如下:
D:\> gawk "{print \"insert into t1 select '\"$1\"'from dual connect by level \x3c=\",$2,\";\"}" aa.txt |sqlplus scott/btbtms@test01p
SCOTT@test01p> insert into t2 select * from t1;
14600 rows created.
SCOTT@test01p> delete t1 where owner='SYS' and rownum<=1;
1 row deleted.
SCOTT@test01p> commit ;
Commit complete.
--//前面的連結如果分析buckert=10的情況下,t1表owner欄位建立的直方圖是HYBRID,t2建立的直方圖是TOP-FREQUENCY.
with a as (select distinct owner,count(*) over(partition by owner) n1 ,count(*) over () n2 from t1 order by 2 desc ),
b as (select owner,n1,n2,sum(n1) over (order by n1 desc) n3 from a order by n1 desc)
select rownum,owner,n1,n2,n3,round(n3/n2,5) x1,round(1-1/rownum,5) x2 from b;
ROWNUM OWNER N1 N2 N3 X1 X2
---------- -------------------- ---------- ---------- ---------- ---------- ----------
1 APEX_040200 3405 14599 3405 .23324 0
2 ORDSYS 3157 14599 6562 .44948 .5
3 MDSYS 1819 14599 8381 .57408 .66667
4 PUBLIC 1047 14599 9428 .6458 .75
5 XDB 985 14599 10413 .71327 .8
6 SYS 941 14599 11354 .77772 .83333
7 SYSTEM 641 14599 11995 .82163 .85714
8 CTXSYS 405 14599 12400 .84937 .875
9 WMSYS 387 14599 12787 .87588 .88889
10 DVSYS 352 14599 13139 .89999 .9
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 SH 309 14599 13448 .92116 .90909
12 ORDDATA 292 14599 13740 .94116 .91667
13 LBACSYS 209 14599 13949 .95548 .92308
14 OE 142 14599 14091 .9652 .92857
15 SCOTT 96 14599 14187 .97178 .93333
16 GSMADMIN_INTERNAL 77 14599 14264 .97705 .9375
17 IX 58 14599 14322 .98103 .94118
18 DBSNMP 55 14599 14377 .98479 .94444
19 PM 44 14599 14421 .98781 .94737
20 HR 35 14599 14456 .9902 .95
21 OLAPSYS 25 14599 14481 .99192 .95238
22 OJVMSYS 23 14599 14504 .99349 .95455
23 DVF 19 14599 14523 .99479 .95652
24 FLOWS_FILES 13 14599 14536 .99568 .95833
25 AUDSYS 12 14599 14548 .99651 .96
26 ORDPLUGINS 10 14599 14568 .99788 .96154
27 OUTLN 10 14599 14568 .99788 .96296
28 BI 8 14599 14592 .99952 .96429
29 ORACLE_OCM 8 14599 14592 .99952 .96552
30 SI_INFORMTN_SCHEM 8 14599 14592 .99952 .96667
31 APPQOSSYS 5 14599 14597 .99986 .96774
32 TEST 2 14599 14599 1 .96875
32 rows selected.
with a as (select distinct owner,count(*) over(partition by owner) n1 ,count(*) over () n2 from t2 order by 2 desc ),
b as (select owner,n1,n2,sum(n1) over (order by n1 desc) n3 from a order by n1 desc)
select rownum,owner,n1,n2,n3,round(n3/n2,5) x1,round(1-1/rownum,5) x2 from b;
ROWNUM OWNER N1 N2 N3 X1 X2
---------- -------------------- ---------- ---------- ---------- ---------- ----------
1 APEX_040200 3405 14600 3405 .23322 0
2 ORDSYS 3157 14600 6562 .44945 .5
3 MDSYS 1819 14600 8381 .57404 .66667
4 PUBLIC 1047 14600 9428 .64575 .75
5 XDB 985 14600 10413 .71322 .8
6 SYS 942 14600 11355 .77774 .83333
7 SYSTEM 641 14600 11996 .82164 .85714
8 CTXSYS 405 14600 12401 .84938 .875
9 WMSYS 387 14600 12788 .87589 .88889
10 DVSYS 352 14600 13140 .9 .9
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 SH 309 14600 13449 .92116 .90909
12 ORDDATA 292 14600 13741 .94116 .91667
13 LBACSYS 209 14600 13950 .95548 .92308
14 OE 142 14600 14092 .96521 .92857
15 SCOTT 96 14600 14188 .97178 .93333
16 GSMADMIN_INTERNAL 77 14600 14265 .97705 .9375
17 IX 58 14600 14323 .98103 .94118
18 DBSNMP 55 14600 14378 .98479 .94444
19 PM 44 14600 14422 .98781 .94737
20 HR 35 14600 14457 .99021 .95
21 OLAPSYS 25 14600 14482 .99192 .95238
22 OJVMSYS 23 14600 14505 .99349 .95455
23 DVF 19 14600 14524 .99479 .95652
24 FLOWS_FILES 13 14600 14537 .99568 .95833
25 AUDSYS 12 14600 14549 .99651 .96
26 ORDPLUGINS 10 14600 14569 .99788 .96154
27 OUTLN 10 14600 14569 .99788 .96296
28 BI 8 14600 14593 .99952 .96429
29 ORACLE_OCM 8 14600 14593 .99952 .96552
30 SI_INFORMTN_SCHEM 8 14600 14593 .99952 .96667
31 APPQOSSYS 5 14600 14598 .99986 .96774
32 TEST 2 14600 14600 1 .96875
32 rows selected.
--//注意看下劃線,可以分析buckert=10的情況下,t1表owner欄位建立的直方圖是HYBRID,t2建立的直方圖是TOP-FREQUENCY.
--// a1.sql
exec dbms_stats.gather_table_stats(ownname=>user,tabname=>'&1',method_opt=>'for columns owner size &2');
select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='&1' and column_name ='OWNER';
SCOTT@test01p> @ a1 T1 10
PL/SQL procedure successfully completed.
old 1: select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='&1' and column_name ='OWNER'
new 1: select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='T1' and column_name ='OWNER'
COLUMN_NAME NUM_DISTINCT DENSITY HISTOGRAM SAMPLE_SIZE
-------------------- ------------ ---------- --------------- -----------
OWNER 32 .018378 HYBRID 14599
--//DENSITY=.018378 如何計算呢?我不知道...
SCOTT@test01p> @ a1 T2 10
PL/SQL procedure successfully completed.
old 1: select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='&1' and column_name ='OWNER'
new 1: select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='T2' and column_name ='OWNER'
COLUMN_NAME NUM_DISTINCT DENSITY HISTOGRAM SAMPLE_SIZE
-------------------- ------------ ---------- --------------- -----------
OWNER 32 .000034247 TOP-FREQUENCY 14600
--//DENSITY=1/2/14600 = .00003424657534246575
--//可以發現T1與T2在owner欄位上一個建立的是HYBRID,一個是TOP-FREQUENCY。
3.對比:
SCOTT@test01p> column ENDPOINT_ACTUAL_VALUE_RAW noprint
SCOTT@test01p> select * from user_tab_histograms where table_name ='T1' and column_name ='OWNER';
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL_VALUE ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- -------------- --------------------- --------------------- ------
T1 OWNER 3405 3.3913E+35 APEX_040200 3405 SHARED
T1 OWNER 3890 3.5442E+35 DBSNMP 55 SHARED
T1 OWNER 4386 3.7551E+35 HR 35 SHARED
T1 OWNER 6472 4.0119E+35 MDSYS 1819 SHARED
T1 OWNER 6962 4.1186E+35 ORDDATA 292 SHARED
T1 OWNER 10129 4.1186E+35 ORDSYS 3157 SHARED
T1 OWNER 11230 4.1711E+35 PUBLIC 1047 SHARED
T1 OWNER 12584 4.3277E+35 SYS 941 SHARED
T1 OWNER 13225 4.3277E+35 SYSTEM 641 SHARED
T1 OWNER 14599 4.5831E+35 XDB 985 SHARED
10 rows selected.
SCOTT@test01p> select * from user_tab_histograms where table_name ='T2' and column_name ='OWNER';
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL_VALUE ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- -------------- --------------------- --------------------- ------
T2 OWNER 3405 3.3913E+35 APEX_040200 0 SHARED
T2 OWNER 3810 3.4959E+35 CTXSYS 0 SHARED
T2 OWNER 4162 3.5483E+35 DVSYS 0 SHARED
T2 OWNER 5981 4.0119E+35 MDSYS 0 SHARED
T2 OWNER 9138 4.1186E+35 ORDSYS 0 SHARED
T2 OWNER 10185 4.1711E+35 PUBLIC 0 SHARED
T2 OWNER 11127 4.3277E+35 SYS 0 SHARED
T2 OWNER 11768 4.3277E+35 SYSTEM 0 SHARED
T2 OWNER 12155 4.5330E+35 WMSYS 0 SHARED
T2 OWNER 13140 4.5831E+35 XDB 0 SHARED
10 rows selected.
--//你可以發現HYBRID與TOP-FREQUENCY直方圖的一點不同之處,對於HYBRID histogram欄位ENDPOINT_REPEAT_COUNT記錄
--//ENDPOINT_ACTUAL_VALUE出現的頻度。
select a.* from user_tab_histograms a where a.table_name ='T1' and a.column_name ='OWNER'
and not exists ( select 1 from user_tab_histograms where table_name ='T2' and column_name ='OWNER' and
ENDPOINT_ACTUAL_VALUE=a.ENDPOINT_ACTUAL_VALUE);
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL_VALUE ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- -------------- --------------------- --------------------- ------
T1 OWNER 3890 3.5442E+35 DBSNMP 55 SHARED
T1 OWNER 4386 3.7551E+35 HR 35 SHARED
T1 OWNER 6962 4.1186E+35 ORDDATA 292 SHARED
--//這3個ENDPOINT_ACTUAL_VALUE並不是流行值。
4.測試:
SCOTT@test01p> alter session set statistics_level = all;
Session altered.
SCOTT@test01p> select count(*) from t1 where owner='HR';
COUNT(*)
--------
35
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID axgs6vcm4mvs3, child number 0
-------------------------------------
select count(*) from t1 where owner='HR'
Plan hash value: 3724264953
---------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 9 (100)| | 1 |00:00:00.01 | 31 |
| 1 | SORT AGGREGATE | | 1 | 1 | 8 | | | 1 |00:00:00.01 | 31 |
|* 2 | TABLE ACCESS FULL| T1 | 1 | 35 | 280 | 9 (0)| 00:00:01 | 35 |00:00:00.01 | 31 |
---------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T1@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OWNER"='HR')
--//出現在直方圖內的值估計很準確。
SCOTT@test01p> select count(*) from t1 where owner='CTXSYS';
COUNT(*)
----------
405
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 8f6jd9fzfqqz4, child number 0
-------------------------------------
select count(*) from t1 where owner='CTXSYS'
Plan hash value: 3724264953
---------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 9 (100)| | 1 |00:00:00.01 | 31 |
| 1 | SORT AGGREGATE | | 1 | 1 | 8 | | | 1 |00:00:00.01 | 31 |
|* 2 | TABLE ACCESS FULL| T1 | 1 | 214 | 1712 | 9 (0)| 00:00:01 | 405 |00:00:00.01 | 31 |
---------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T1@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OWNER"='CTXSYS')
D:\tools\sqllaji>cat 10053x.sql
execute dbms_sqldiag.dump_trace(p_sql_id=>'&1',p_child_number=>&2,p_component=>'Compiler',p_file_id=>'&&1');
SCOTT@test01p> @ 10053x 8f6jd9fzfqqz4 0
PL/SQL procedure successfully completed.
***************************************
SINGLE TABLE ACCESS PATH
Single Table Cardinality Estimation for T1[T1]
SPD: Return code in qosdDSDirSetup: NOCTX, estType = TABLE
kkecdn: Single Table Predicate:"T1"."OWNER"='CTXSYS'
Column (#1):
NewDensity:0.014687, OldDensity:0.018378 BktCnt:14599.000000, PopBktCnt:8381.000000, PopValCnt:3, NDV:32
Column (#1): OWNER(VARCHAR2)
AvgLen: 8 NDV: 32 Nulls: 0 Density: 0.014687
Histogram: Hybrid #Bkts: 10 UncompBkts: 14599 EndPtVals: 10 ActualVal: yes
Using density: 0.014687 of col #1 as selectivity of pred having unreasonably low value
Table: T1 Alias: T1
Card: Original: 14599.000000 Rounded: 214 Computed: 214.413793 Non Adjusted: 214.413793
Scan IO Cost (Disk) = 9.000000
Scan CPU Cost (Disk) = 2389250.320000
Cost of predicates:
io = NOCOST, cpu = 50.000000, sel = 0.014687 flag = 2048 ("T1"."OWNER"='CTXSYS')
Total Scan IO Cost = 9.000000 (scan (Disk))
+ 0.000000 (io filter eval) (= 0.000000 (per row) * 14599.000000 (#rows))
= 9.000000
Total Scan CPU Cost = 2389250.320000 (scan (Disk))
+ 729950.000000 (cpu filter eval) (= 50.000000 (per row) * 14599.000000 (#rows))
= 3119200.320000
Access Path: TableScan
Cost: 9.138291 Resp: 9.138291 Degree: 0
Cost_io: 9.000000 Cost_cpu: 3119200
Resp_io: 9.000000 Resp_cpu: 3119200
Best:: AccessPath: TableScan
Cost: 9.138291 Degree: 1 Resp: 9.138291 Card: 214.413793 Bytes: 0.000000
check parallelism for statement[<unnamed>]
kkfdPaPrm.1:curInst:4, curpxEnabled=1, curCPUCount=1
kkfdPaPrm.2:sessInst:4, sesspxEnabled=1, sesCPUCount=1
kkfdPaForcePrm: dop:1 ()
use dictionary DOP(1) on table
kkfdPaPrm:- The table : 27639
kkfdPaPrm:DOP = 1 (computed from hint/dictionary/autodop)
kkfdiPaPrm: dop:1 serial(?) flags: 1
***************************************
--//使用 NewDensity:0.014687
BktCnt:14599.000000, PopBktCnt:8381.000000, PopValCnt:3, NDV:32
--//非流行值的數量: 14599-8381 = 6218
--//非流行值的桶數量: 32-3=29
--//非流行值的數量/非流行值的桶數量 6218/29 = 214.41379310344827586206,四捨五入214,正好符合執行計劃的推斷.
--//NewDensity的計算 =6218/14599/29 = .01468688219079719678,,非常接近.
--//問題是hybrid histogram 如何確定PopValCnt:3.
--//實際上確定PopValCnt就是指 endpoint_repeat_count - sample_size/num_buckets的bucket數量。
--//對於本例子:
SCOTT@test01p> column ENDPOINT_ACTUAL_VALUE_RAW noprint
SCOTT@test01p> select * from user_tab_histograms where table_name ='T1' and column_name ='OWNER' and endpoint_repeat_count - 14599/10>0;
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL_VALUE ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- -------------- --------------------- --------------------- ------
T1 OWNER 3405 3.3913E+35 APEX_040200 3405 SHARED
T1 OWNER 6472 4.0119E+35 MDSYS 1819 SHARED
T1 OWNER 10129 4.1186E+35 ORDSYS 3157 SHARED
--//PopValCnt=3
--//實際上感覺Hybrid histogram很複雜,大家可以參考連結
--//我自己還有1個疑問就是DENSITY=.018378如何計算的。
--//如果使用expland plan for 檢視繫結變數的執行計劃:
SCOTT@test01p> explain plan for Select count(*) from t1 where owner=:v_owner;
Explained.
SCOTT@test01p> @ dp
PLAN_TABLE_OUTPUT
---------------------------
Plan hash value: 3724264953
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 8 | 9 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 8 | | |
|* 2 | TABLE ACCESS FULL| T1 | 456 | 3648 | 9 (0)| 00:00:01 |
---------------------------------------------------------------------------
--//這裡的rows不是透過 14599*.018378 = 268.300422計算得來的,而是14599/32 = 456.21875得來的,這樣檢視中記錄的值毫無意義。
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T1@SEL$1
Outline Data
-------------
/*+
BEGIN_OUTLINE_DATA
FULL(@"SEL$1" "T1"@"SEL$1")
OUTLINE_LEAF(@"SEL$1")
ALL_ROWS
DB_VERSION('12.2.0.1')
OPTIMIZER_FEATURES_ENABLE('12.2.0.1')
IGNORE_OPTIM_EMBEDDED_HINTS
END_OUTLINE_DATA
*/
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OWNER"=:V_OWNER)
Column Projection Information (identified by operation id):
-----------------------------------------------------------
1 - (#keys=0) COUNT(*)[22]
2 - (rowset=1019)
40 rows selected.
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2649905/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- hybrid筆記筆記
- Hybrid 混合App開發APP
- Hybrid治理(UIWebView&WKWebView)UIWebView
- Access、Trunk、Hybrid含義及Hybrid埠型別的常見配置應用型別
- Hybrid APP 開發(六):JSSDKAPPJS
- Identity Server 4 - Hybrid Flow - ClaimsIDEServerAI
- 從JSCore瞭解Hybrid開發JS
- Cordova+Vue快速搭建Hybrid AppVueAPP
- ENSP Demo3 VLAN Trunk & Hybrid
- Hybrid App從概念到實戰APP
- Centaur: A Framework for Hybrid CPU-FPGA DatabasesFrameworkFPGADatabase
- 探究Hybrid-APP技術原理APP
- Hybrid前端jsbridge設計原理分析前端JS
- hybrid混合編譯開發 更新編譯
- Hybrid App技術解析 — 實戰篇APP
- Hybrid App技術解析 -- 原理篇APP
- Hybrid App技術解析 — 原理篇APP
- Hybrid App技術解析 -- 實戰篇APP
- 實驗8.Vlan Hybrid實驗
- iOS開發基礎117-HybridiOS
- 小程式會讓Hybrid App崛起嗎APP
- 常見Hybrid App框架優劣對比APP框架
- Anti-alias的前世今生(三):Hybrid AA
- Oracle Hybrid Columnar Compression(HCC) 混合列壓縮Oracle
- 12C打psu
- Oracle 12c系列(十) | 12c中的Recovering Tables and Table PartitionsOracle
- Identity Server 4 - Hybrid Flow - 保護API資源IDEServerAPI
- hybrid write barrier 為什麼能消除 stack rescanning?
- Web App、Hybrid App、Native App 橫向對比WebAPP
- Native App及Hybrid App優缺點介紹!APP
- 【OCM】Oracle 12C OCMU 12c OCM升級考試大綱Oracle
- Oracle 12c Automatic ReoptimizationOracle
- Oracle 12C安裝Oracle
- 12C 線上MOVE
- 12c pdb基本操作
- Hybrid小程式混合開發之路 – 資料互動
- 前端架構之移動端混合架構(hybrid)前端架構
- 讓你的Hybrid App聽懂你的話(Android篇)APPAndroid