[20190703]12c Hybrid histogram.txt
[20190703]12c Hybrid histogram.txt
--//個人對直方圖瞭解很少,以前2種直方圖型別對於目前的許多應用來講已經足夠,或者講遇到的問題很少.
--//抽一點點時間,簡單探究12c HYBRID histogram.
--//以前已經探究過Top Frequency histogram,連結
--//http://blog.itpub.net/267265/viewspace-2140257/=>[20170603]12c Top Frequency histogram.txt
1.環境:
SCOTT@test01p> @ ver1
PORT_STRING VERSION BANNER CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0 12.2.0.1.0 Oracle Database 12c Enterprise Edition Release 12.2.0.1.0 - 64bit Production 0
2.測試例子建立:
create table t1 (owner varchar2(30));
create table t2 (owner varchar2(30));
$ cat aa.txt
APEX_040200 3405
ORDSYS 3157
MDSYS 1819
PUBLIC 1047
XDB 985
SYS 942
SYSTEM 641
CTXSYS 405
WMSYS 387
DVSYS 352
SH 309
ORDDATA 292
LBACSYS 209
OE 142
SCOTT 96
GSMADMIN_INTERNAL 77
IX 58
DBSNMP 55
PM 44
HR 35
OLAPSYS 25
OJVMSYS 23
DVF 19
FLOWS_FILES 13
AUDSYS 12
ORDPLUGINS 10
OUTLN 10
BI 8
ORACLE_OCM 8
SI_INFORMTN_SCHEM 8
APPQOSSYS 5
TEST 2
--//注這個是上次測試owner的資料分佈,還是以這個為藍本探究。
awk "{print \"insert into t1 select '\"$1\"'from dual connect by level \74=\",$2,\";\"}" aa.txt
awk "{print \"insert into t1 select '\"$1\"'from dual connect by level \x3c=\",$2,\";\"}" aa.txt
--//說明:windows下awk真變態,外層使用雙引號,內部無法解析雙引號。"<"不知道如何轉換,使用\74表示(實際上八進位制).
--//在vim下透過ga命令確定. 輸入< ,在該字元上打入ga,在提示行出現:<<> 60, 十六進位制 3c, 八進位制 074
--//執行如下:
D:\> gawk "{print \"insert into t1 select '\"$1\"'from dual connect by level \x3c=\",$2,\";\"}" aa.txt |sqlplus scott/btbtms@test01p
SCOTT@test01p> insert into t2 select * from t1;
14600 rows created.
SCOTT@test01p> delete t1 where owner='SYS' and rownum<=1;
1 row deleted.
SCOTT@test01p> commit ;
Commit complete.
--//前面的連結如果分析buckert=10的情況下,t1表owner欄位建立的直方圖是HYBRID,t2建立的直方圖是TOP-FREQUENCY.
with a as (select distinct owner,count(*) over(partition by owner) n1 ,count(*) over () n2 from t1 order by 2 desc ),
b as (select owner,n1,n2,sum(n1) over (order by n1 desc) n3 from a order by n1 desc)
select rownum,owner,n1,n2,n3,round(n3/n2,5) x1,round(1-1/rownum,5) x2 from b;
ROWNUM OWNER N1 N2 N3 X1 X2
---------- -------------------- ---------- ---------- ---------- ---------- ----------
1 APEX_040200 3405 14599 3405 .23324 0
2 ORDSYS 3157 14599 6562 .44948 .5
3 MDSYS 1819 14599 8381 .57408 .66667
4 PUBLIC 1047 14599 9428 .6458 .75
5 XDB 985 14599 10413 .71327 .8
6 SYS 941 14599 11354 .77772 .83333
7 SYSTEM 641 14599 11995 .82163 .85714
8 CTXSYS 405 14599 12400 .84937 .875
9 WMSYS 387 14599 12787 .87588 .88889
10 DVSYS 352 14599 13139 .89999 .9
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 SH 309 14599 13448 .92116 .90909
12 ORDDATA 292 14599 13740 .94116 .91667
13 LBACSYS 209 14599 13949 .95548 .92308
14 OE 142 14599 14091 .9652 .92857
15 SCOTT 96 14599 14187 .97178 .93333
16 GSMADMIN_INTERNAL 77 14599 14264 .97705 .9375
17 IX 58 14599 14322 .98103 .94118
18 DBSNMP 55 14599 14377 .98479 .94444
19 PM 44 14599 14421 .98781 .94737
20 HR 35 14599 14456 .9902 .95
21 OLAPSYS 25 14599 14481 .99192 .95238
22 OJVMSYS 23 14599 14504 .99349 .95455
23 DVF 19 14599 14523 .99479 .95652
24 FLOWS_FILES 13 14599 14536 .99568 .95833
25 AUDSYS 12 14599 14548 .99651 .96
26 ORDPLUGINS 10 14599 14568 .99788 .96154
27 OUTLN 10 14599 14568 .99788 .96296
28 BI 8 14599 14592 .99952 .96429
29 ORACLE_OCM 8 14599 14592 .99952 .96552
30 SI_INFORMTN_SCHEM 8 14599 14592 .99952 .96667
31 APPQOSSYS 5 14599 14597 .99986 .96774
32 TEST 2 14599 14599 1 .96875
32 rows selected.
with a as (select distinct owner,count(*) over(partition by owner) n1 ,count(*) over () n2 from t2 order by 2 desc ),
b as (select owner,n1,n2,sum(n1) over (order by n1 desc) n3 from a order by n1 desc)
select rownum,owner,n1,n2,n3,round(n3/n2,5) x1,round(1-1/rownum,5) x2 from b;
ROWNUM OWNER N1 N2 N3 X1 X2
---------- -------------------- ---------- ---------- ---------- ---------- ----------
1 APEX_040200 3405 14600 3405 .23322 0
2 ORDSYS 3157 14600 6562 .44945 .5
3 MDSYS 1819 14600 8381 .57404 .66667
4 PUBLIC 1047 14600 9428 .64575 .75
5 XDB 985 14600 10413 .71322 .8
6 SYS 942 14600 11355 .77774 .83333
7 SYSTEM 641 14600 11996 .82164 .85714
8 CTXSYS 405 14600 12401 .84938 .875
9 WMSYS 387 14600 12788 .87589 .88889
10 DVSYS 352 14600 13140 .9 .9
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
11 SH 309 14600 13449 .92116 .90909
12 ORDDATA 292 14600 13741 .94116 .91667
13 LBACSYS 209 14600 13950 .95548 .92308
14 OE 142 14600 14092 .96521 .92857
15 SCOTT 96 14600 14188 .97178 .93333
16 GSMADMIN_INTERNAL 77 14600 14265 .97705 .9375
17 IX 58 14600 14323 .98103 .94118
18 DBSNMP 55 14600 14378 .98479 .94444
19 PM 44 14600 14422 .98781 .94737
20 HR 35 14600 14457 .99021 .95
21 OLAPSYS 25 14600 14482 .99192 .95238
22 OJVMSYS 23 14600 14505 .99349 .95455
23 DVF 19 14600 14524 .99479 .95652
24 FLOWS_FILES 13 14600 14537 .99568 .95833
25 AUDSYS 12 14600 14549 .99651 .96
26 ORDPLUGINS 10 14600 14569 .99788 .96154
27 OUTLN 10 14600 14569 .99788 .96296
28 BI 8 14600 14593 .99952 .96429
29 ORACLE_OCM 8 14600 14593 .99952 .96552
30 SI_INFORMTN_SCHEM 8 14600 14593 .99952 .96667
31 APPQOSSYS 5 14600 14598 .99986 .96774
32 TEST 2 14600 14600 1 .96875
32 rows selected.
--//注意看下劃線,可以分析buckert=10的情況下,t1表owner欄位建立的直方圖是HYBRID,t2建立的直方圖是TOP-FREQUENCY.
--// a1.sql
exec dbms_stats.gather_table_stats(ownname=>user,tabname=>'&1',method_opt=>'for columns owner size &2');
select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='&1' and column_name ='OWNER';
SCOTT@test01p> @ a1 T1 10
PL/SQL procedure successfully completed.
old 1: select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='&1' and column_name ='OWNER'
new 1: select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='T1' and column_name ='OWNER'
COLUMN_NAME NUM_DISTINCT DENSITY HISTOGRAM SAMPLE_SIZE
-------------------- ------------ ---------- --------------- -----------
OWNER 32 .018378 HYBRID 14599
--//DENSITY=.018378 如何計算呢?我不知道...
SCOTT@test01p> @ a1 T2 10
PL/SQL procedure successfully completed.
old 1: select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='&1' and column_name ='OWNER'
new 1: select column_name,num_distinct,density,histogram,SAMPLE_SIZE from user_tab_col_statistics where table_name ='T2' and column_name ='OWNER'
COLUMN_NAME NUM_DISTINCT DENSITY HISTOGRAM SAMPLE_SIZE
-------------------- ------------ ---------- --------------- -----------
OWNER 32 .000034247 TOP-FREQUENCY 14600
--//DENSITY=1/2/14600 = .00003424657534246575
--//可以發現T1與T2在owner欄位上一個建立的是HYBRID,一個是TOP-FREQUENCY。
3.對比:
SCOTT@test01p> column ENDPOINT_ACTUAL_VALUE_RAW noprint
SCOTT@test01p> select * from user_tab_histograms where table_name ='T1' and column_name ='OWNER';
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL_VALUE ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- -------------- --------------------- --------------------- ------
T1 OWNER 3405 3.3913E+35 APEX_040200 3405 SHARED
T1 OWNER 3890 3.5442E+35 DBSNMP 55 SHARED
T1 OWNER 4386 3.7551E+35 HR 35 SHARED
T1 OWNER 6472 4.0119E+35 MDSYS 1819 SHARED
T1 OWNER 6962 4.1186E+35 ORDDATA 292 SHARED
T1 OWNER 10129 4.1186E+35 ORDSYS 3157 SHARED
T1 OWNER 11230 4.1711E+35 PUBLIC 1047 SHARED
T1 OWNER 12584 4.3277E+35 SYS 941 SHARED
T1 OWNER 13225 4.3277E+35 SYSTEM 641 SHARED
T1 OWNER 14599 4.5831E+35 XDB 985 SHARED
10 rows selected.
SCOTT@test01p> select * from user_tab_histograms where table_name ='T2' and column_name ='OWNER';
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL_VALUE ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- -------------- --------------------- --------------------- ------
T2 OWNER 3405 3.3913E+35 APEX_040200 0 SHARED
T2 OWNER 3810 3.4959E+35 CTXSYS 0 SHARED
T2 OWNER 4162 3.5483E+35 DVSYS 0 SHARED
T2 OWNER 5981 4.0119E+35 MDSYS 0 SHARED
T2 OWNER 9138 4.1186E+35 ORDSYS 0 SHARED
T2 OWNER 10185 4.1711E+35 PUBLIC 0 SHARED
T2 OWNER 11127 4.3277E+35 SYS 0 SHARED
T2 OWNER 11768 4.3277E+35 SYSTEM 0 SHARED
T2 OWNER 12155 4.5330E+35 WMSYS 0 SHARED
T2 OWNER 13140 4.5831E+35 XDB 0 SHARED
10 rows selected.
--//你可以發現HYBRID與TOP-FREQUENCY直方圖的一點不同之處,對於HYBRID histogram欄位ENDPOINT_REPEAT_COUNT記錄
--//ENDPOINT_ACTUAL_VALUE出現的頻度。
select a.* from user_tab_histograms a where a.table_name ='T1' and a.column_name ='OWNER'
and not exists ( select 1 from user_tab_histograms where table_name ='T2' and column_name ='OWNER' and
ENDPOINT_ACTUAL_VALUE=a.ENDPOINT_ACTUAL_VALUE);
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL_VALUE ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- -------------- --------------------- --------------------- ------
T1 OWNER 3890 3.5442E+35 DBSNMP 55 SHARED
T1 OWNER 4386 3.7551E+35 HR 35 SHARED
T1 OWNER 6962 4.1186E+35 ORDDATA 292 SHARED
--//這3個ENDPOINT_ACTUAL_VALUE並不是流行值。
4.測試:
SCOTT@test01p> alter session set statistics_level = all;
Session altered.
SCOTT@test01p> select count(*) from t1 where owner='HR';
COUNT(*)
--------
35
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID axgs6vcm4mvs3, child number 0
-------------------------------------
select count(*) from t1 where owner='HR'
Plan hash value: 3724264953
---------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 9 (100)| | 1 |00:00:00.01 | 31 |
| 1 | SORT AGGREGATE | | 1 | 1 | 8 | | | 1 |00:00:00.01 | 31 |
|* 2 | TABLE ACCESS FULL| T1 | 1 | 35 | 280 | 9 (0)| 00:00:01 | 35 |00:00:00.01 | 31 |
---------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T1@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OWNER"='HR')
--//出現在直方圖內的值估計很準確。
SCOTT@test01p> select count(*) from t1 where owner='CTXSYS';
COUNT(*)
----------
405
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 8f6jd9fzfqqz4, child number 0
-------------------------------------
select count(*) from t1 where owner='CTXSYS'
Plan hash value: 3724264953
---------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 9 (100)| | 1 |00:00:00.01 | 31 |
| 1 | SORT AGGREGATE | | 1 | 1 | 8 | | | 1 |00:00:00.01 | 31 |
|* 2 | TABLE ACCESS FULL| T1 | 1 | 214 | 1712 | 9 (0)| 00:00:01 | 405 |00:00:00.01 | 31 |
---------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T1@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OWNER"='CTXSYS')
D:\tools\sqllaji>cat 10053x.sql
execute dbms_sqldiag.dump_trace(p_sql_id=>'&1',p_child_number=>&2,p_component=>'Compiler',p_file_id=>'&&1');
SCOTT@test01p> @ 10053x 8f6jd9fzfqqz4 0
PL/SQL procedure successfully completed.
***************************************
SINGLE TABLE ACCESS PATH
Single Table Cardinality Estimation for T1[T1]
SPD: Return code in qosdDSDirSetup: NOCTX, estType = TABLE
kkecdn: Single Table Predicate:"T1"."OWNER"='CTXSYS'
Column (#1):
NewDensity:0.014687, OldDensity:0.018378 BktCnt:14599.000000, PopBktCnt:8381.000000, PopValCnt:3, NDV:32
Column (#1): OWNER(VARCHAR2)
AvgLen: 8 NDV: 32 Nulls: 0 Density: 0.014687
Histogram: Hybrid #Bkts: 10 UncompBkts: 14599 EndPtVals: 10 ActualVal: yes
Using density: 0.014687 of col #1 as selectivity of pred having unreasonably low value
Table: T1 Alias: T1
Card: Original: 14599.000000 Rounded: 214 Computed: 214.413793 Non Adjusted: 214.413793
Scan IO Cost (Disk) = 9.000000
Scan CPU Cost (Disk) = 2389250.320000
Cost of predicates:
io = NOCOST, cpu = 50.000000, sel = 0.014687 flag = 2048 ("T1"."OWNER"='CTXSYS')
Total Scan IO Cost = 9.000000 (scan (Disk))
+ 0.000000 (io filter eval) (= 0.000000 (per row) * 14599.000000 (#rows))
= 9.000000
Total Scan CPU Cost = 2389250.320000 (scan (Disk))
+ 729950.000000 (cpu filter eval) (= 50.000000 (per row) * 14599.000000 (#rows))
= 3119200.320000
Access Path: TableScan
Cost: 9.138291 Resp: 9.138291 Degree: 0
Cost_io: 9.000000 Cost_cpu: 3119200
Resp_io: 9.000000 Resp_cpu: 3119200
Best:: AccessPath: TableScan
Cost: 9.138291 Degree: 1 Resp: 9.138291 Card: 214.413793 Bytes: 0.000000
check parallelism for statement[<unnamed>]
kkfdPaPrm.1:curInst:4, curpxEnabled=1, curCPUCount=1
kkfdPaPrm.2:sessInst:4, sesspxEnabled=1, sesCPUCount=1
kkfdPaForcePrm: dop:1 ()
use dictionary DOP(1) on table
kkfdPaPrm:- The table : 27639
kkfdPaPrm:DOP = 1 (computed from hint/dictionary/autodop)
kkfdiPaPrm: dop:1 serial(?) flags: 1
***************************************
--//使用 NewDensity:0.014687
BktCnt:14599.000000, PopBktCnt:8381.000000, PopValCnt:3, NDV:32
--//非流行值的數量: 14599-8381 = 6218
--//非流行值的桶數量: 32-3=29
--//非流行值的數量/非流行值的桶數量 6218/29 = 214.41379310344827586206,四捨五入214,正好符合執行計劃的推斷.
--//NewDensity的計算 =6218/14599/29 = .01468688219079719678,,非常接近.
--//問題是hybrid histogram 如何確定PopValCnt:3.
--//實際上確定PopValCnt就是指 endpoint_repeat_count - sample_size/num_buckets的bucket數量。
--//對於本例子:
SCOTT@test01p> column ENDPOINT_ACTUAL_VALUE_RAW noprint
SCOTT@test01p> select * from user_tab_histograms where table_name ='T1' and column_name ='OWNER' and endpoint_repeat_count - 14599/10>0;
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE ENDPOINT_ACTUAL_VALUE ENDPOINT_REPEAT_COUNT SCOPE
---------- ----------- --------------- -------------- --------------------- --------------------- ------
T1 OWNER 3405 3.3913E+35 APEX_040200 3405 SHARED
T1 OWNER 6472 4.0119E+35 MDSYS 1819 SHARED
T1 OWNER 10129 4.1186E+35 ORDSYS 3157 SHARED
--//PopValCnt=3
--//實際上感覺Hybrid histogram很複雜,大家可以參考連結
--//我自己還有1個疑問就是DENSITY=.018378如何計算的。
--//如果使用expland plan for 檢視繫結變數的執行計劃:
SCOTT@test01p> explain plan for Select count(*) from t1 where owner=:v_owner;
Explained.
SCOTT@test01p> @ dp
PLAN_TABLE_OUTPUT
---------------------------
Plan hash value: 3724264953
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 8 | 9 (0)| 00:00:01 |
| 1 | SORT AGGREGATE | | 1 | 8 | | |
|* 2 | TABLE ACCESS FULL| T1 | 456 | 3648 | 9 (0)| 00:00:01 |
---------------------------------------------------------------------------
--//這裡的rows不是透過 14599*.018378 = 268.300422計算得來的,而是14599/32 = 456.21875得來的,這樣檢視中記錄的值毫無意義。
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$1
2 - SEL$1 / T1@SEL$1
Outline Data
-------------
/*+
BEGIN_OUTLINE_DATA
FULL(@"SEL$1" "T1"@"SEL$1")
OUTLINE_LEAF(@"SEL$1")
ALL_ROWS
DB_VERSION('12.2.0.1')
OPTIMIZER_FEATURES_ENABLE('12.2.0.1')
IGNORE_OPTIM_EMBEDDED_HINTS
END_OUTLINE_DATA
*/
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("OWNER"=:V_OWNER)
Column Projection Information (identified by operation id):
-----------------------------------------------------------
1 - (#keys=0) COUNT(*)[22]
2 - (rowset=1019)
40 rows selected.
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2649905/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- [20170603]12c Top Frequency histogram.txtHistogram
- Oracle 12c新特性 - Hybrid histogram 3OracleHistogram
- Oracle 12c新特性 - Hybrid histogram 2OracleHistogram
- Oracle 12c新特性 - Hybrid histogram 1OracleHistogram
- hybrid筆記筆記
- Hybrid linuxLinux
- Hybrid治理(UIWebView&WKWebView)UIWebView
- Hybrid 混合App開發APP
- Access、Trunk、Hybrid含義及Hybrid埠型別的常見配置應用型別
- Django+Hybrid 初體驗Django
- Hybrid App開發實戰APP
- Hybrid APP 開發(六):JSSDKAPPJS
- Hybrid App從概念到實戰APP
- 探究Hybrid-APP技術原理APP
- hybrid混合編譯開發 更新編譯
- Cordova+Vue快速搭建Hybrid AppVueAPP
- Centaur: A Framework for Hybrid CPU-FPGA DatabasesFrameworkFPGADatabase
- Identity Server 4 - Hybrid Flow - ClaimsIDEServerAI
- AndroidStudio 建立 Hybrid App工程AndroidAPP
- ENSP Demo3 VLAN Trunk & Hybrid
- 從JSCore瞭解Hybrid開發JS
- Hybrid App技術解析 — 實戰篇APP
- Hybrid App技術解析 -- 原理篇APP
- Hybrid App技術解析 -- 實戰篇APP
- Hybrid App技術解析 — 原理篇APP
- 小程式會讓Hybrid App崛起嗎APP
- 別闖進Hybrid App的誤區APP
- Multi-Device Hybrid Apps (Preview)devAPPView
- 實驗8.Vlan Hybrid實驗
- 常見Hybrid App框架優劣對比APP框架
- Hybrid前端jsbridge設計原理分析前端JS
- 如何打造一個高效能 Hybrid appAPP
- Android Hybrid App四大坑AndroidAPP
- Oracle Database Compression 3 - Hybrid Columnar CompressionOracleDatabase
- iOS開發基礎117-HybridiOS
- 【12c Partitioning】Oracle 12c Partitioning特性Oracle
- Oracle Hybrid Columnar Compression(HCC) 混合列壓縮Oracle
- Oracle 12cOracle