12條語句學會oracle cbo計算(十四)

selectshen發表於2015-05-28
12條語句學會oracle cbo計算(十四)
工作中,你可能會遇到統計資料正確,但就是不走你想要的執行計劃的情況,最後一般透過hint或sql_profile解決.一條sql語句的效能主要依賴於好的物理結構,準確的系統統計資料,準確的物件統計資料,合理的查詢最佳化器引數,合理的系統引數.這些因素也就是cost計算基本引數.因此產生的sql最佳化技術有很多,大概包含:改變訪問結構,修改sql語句,加hint,改變執行環境,sql profile,stored outlines,sql plan baseline.其實無外乎就是透過調整sql適應環境或者透過調整環境適應sql.如果清楚Cost值的演算法,就可以透過演算法的對比,找到問題的真正原因,更有針對性的去解決問題.
全文主要參考Jonathan Lewis的<>和黃瑋(fuyuncat)的<>,特別黃瑋(fuyuncat)的這本,是非常值得去學習的.

準備用14篇來描述完,前2篇是統計資料,演算法公式說明,後12篇用12條語句分別去套用說明.
本篇例子的特徵是雙表,雜湊關聯,並行,和上一篇差別是雙表並行雜湊關聯

--產生測試資料
drop table scott.t_test1 purge;
drop table scott.t_test2 purge;
create table scott.t_test1 as select * from dba_objects;
insert into scott.t_test1  select * from scott.t_test1;
insert into scott.t_test1  select * from scott.t_test1;
insert into scott.t_test1  select * from scott.t_test1;
create table scott.t_test2 as select * from dba_objects;
insert into scott.t_test2  select * from scott.t_test2;
insert into scott.t_test2  select * from scott.t_test2;
insert into scott.t_test2  select * from scott.t_test2;
begin
dbms_stats.gather_table_stats('scott','t_test1');
end;
begin
dbms_stats.gather_table_stats('scott','t_test2');
end;

--產生語句的執行計劃
--這裡我是在pl/sql developer,是因為不用象10053那麼麻煩就可以產生想要的幾個值用以對比.
explain plan for select /*+use_hash(a,b) parallel(a,2) parallel(b,2)*/* from
scott.t_test1 a,scott.t_test2 b where a.object_id=b.object_id;

SELECT lpad(' ', 2 * (LEVEL - 1)) || operation operation,
       options,
       object_name,
       cardinality,
       bytes,
       io_cost,
       cpu_cost,
       cost,
       time
  FROM plan_table
 START WITH id = 0
CONNECT BY PRIOR id = parent_id;
/*
OPERATION    OPTIONS    OBJECT_NAME    CARDINALITY    BYTES    IO_COST    CPU_COST    COST    TIME
SELECT STATEMENT            5455051    1069189996    6640    754199663    6660    80
  PX COORDINATOR                                
    PX SEND    QC (RANDOM)    :TQ10002    5455051    1069189996    6640    754199663    6660    80
      HASH JOIN    BUFFERED        5455051    1069189996    6640    754199663    6660    80
        PX RECEIVE            690672    67685856    1524    164993867    1529    19
          PX SEND    HASH    :TQ10000    690672    67685856    1524    164993867    1529    19
            PX BLOCK    ITERATOR        690672    67685856    1524    164993867    1529    19
              TABLE ACCESS    FULL    T_TEST1    690672    67685856    1524    164993867    1529    19
        PX RECEIVE            690680    67686640    1524    164995778    1529    19
          PX SEND    HASH    :TQ10001    690680    67686640    1524    164995778    1529    19
            PX BLOCK    ITERATOR        690680    67686640    1524    164995778    1529    19
              TABLE ACCESS    FULL    T_TEST2    690680    67686640    1524    164995778    1529    19
*/

--查詢表的統計資料
select rpad(table_name, 10, ' ') table_name,
       rpad(num_rows, 10, ' ') num_rows,
       rpad(blocks, 10, ' ') blocks,
       avg_row_len
  from dba_tables
 where owner = 'SCOTT'
   and table_name in ('T_TEST1','T_TEST2');
/*
TABLE_NAME    NUM_ROWS    BLOCKS    AVG_ROW_LEN
T_TEST2       690680        10125         98
T_TEST1       690672        10125         98
*/

--查詢列的統計資料
select rpad(table_name, 12, ' ') table_name,
       rpad(column_name, 12, ' ') column_name,
       rpad(num_distinct, 8, ' ') num_distinct,
       rpad(utl_raw.cast_to_number(low_value), 15, ' ') low_value,
       rpad(utl_raw.cast_to_number(high_value), 10, ' ') high_value,
       rpad(nullable, 8, ' ') nullable,
       rpad(num_nulls, 8, ' ') num_nulls,
       rpad(avg_col_len, 6, ' ') avg_col_len,
       rpad(density, 20, ' ') density,
       histogram
  from dba_tab_columns
 where owner = 'SCOTT'
   and table_name in ('T_TEST1','T_TEST2')
   and column_name = 'OBJECT_ID';
/*
TABLE_NAME    COLUMN_NAME    NUM_DISTINCT    LOW_VALUE    HIGH_VALUE    NULLABLE    NUM_NULLS    AVG_COL_LEN    DENSITY    HISTOGRAM
T_TEST1         OBJECT_ID       87448       2                  87783         Y           0           5         .0000114353673039978    NONE
T_TEST2         OBJECT_ID       87448       2                  87784         Y           0           5         .0000114353673039978    NONE
*/

--查詢各列平均長度之和
select table_name,sum(avg_col_len),count(1)
from dba_tab_col_statistics
where owner='SCOTT' and table_name in ( 'T_TEST1','T_TEST2')    
group by table_name;
/*
TABLE_NAME    SUM(AVG_COL_LEN)    COUNT(1)
T_TEST2    100    15
T_TEST1    100    15
*/

--查詢最佳化器引數
select rpad(name,40,' ') name,rpad(value,20,' ') value,isdefault
  from (select nam.ksppinm name,
               val.KSPPSTVL value,
               --nam.ksppdesc description,
               val.ksppstdf isdefault
          from sys.x$ksppi nam, sys.x$ksppcv val
         where nam.inst_id = val.inst_id
           and nam.indx = val.indx)
 where name in
       ('_db_file_optimizer_read_count', 'db_file_multiblock_read_count',
        '_optimizer_block_size', '_table_scan_cost_plus_one',
        '_optimizer_ceil_cost', '_optimizer_cost_model',
        '_optimizer_cache_stats', '_smm_auto_min_io_size',
        '_smm_auto_max_io_size', '_smm_min_size', '_smm_max_size',
        '_smm_px_max_size', 'sort_area_retained_size', 'sort_area_size',
        'workarea_size_policy','_optimizer_percent_parallel');
/*
NAME    VALUE    ISDEFAULT
db_file_multiblock_read_count               116                     TRUE
_db_file_optimizer_read_count               8                       TRUE
sort_area_size                              65536                   TRUE
sort_area_retained_size                     0                       TRUE
_optimizer_cost_model                       CHOOSE                  TRUE
_optimizer_cache_stats                      FALSE                   TRUE
_table_scan_cost_plus_one                   TRUE                    TRUE
workarea_size_policy                        AUTO                    TRUE
_smm_auto_min_io_size                       56                      TRUE
_smm_auto_max_io_size                       248                     TRUE
_smm_min_size                               286                     TRUE
_smm_max_size                               57344                   TRUE
_smm_px_max_size                            143360                  TRUE
_optimizer_percent_parallel                 101                     TRUE
_optimizer_block_size                       8192                    TRUE
_optimizer_ceil_cost                        TRUE                    TRUE
*/

--查詢系統統計資料
select rpad(pname, '20', ' ') pname,
       rpad(pval1, '20', ' ') pval1,
       rpad(pval2, '20', ' ') pval2
  from SYS.AUX_STATS$
 where sname = 'SYSSTATS_MAIN';
/*
PNAME    PVAL1    PVAL2
CPUSPEED                    
CPUSPEEDNW              3074.07407407407        
IOSEEKTIM               10                      
IOTFRSPEED              4096                    
MAXTHR                      
MBRC                        
MREADTIM                    
SLAVETHR                    
SREADTIM                                                
*/

--需要應用第二篇中的公式:
(1)NDV=dba_tab_co1umns.num_distinct
(2)DENS=dba_tab_co1umns.DENSITY
(3)ALLROWS=dba_tab1es.NUM_ROWS
(6)COLNB=dba_tab_co1umns.NULLABLE
(11)MBRC=最佳化器系統引數_db_fi1e_optimizer_read_count
(12)SAMINIO=最佳化器系統引數_smm_auto_min_io_size*1024
(13)SAMAXIO=最佳化器系統引數_smm_auto_max_io_size*1024
(14)OPTBLKSIZE=最佳化器系統引數_optimizer_b1ock_size
(15)SMPXMAX=最佳化器系統引數_smm_px_max_size*1024
(21)CPUSPEED=系統統計資料CPUSPEEDNW
(22)IOTFRSPEED=系統統計資料IOTFRSPEED
(23)IOSEEKTIM=系統統計資料IOSEEKTIM
(24)SREADTIM = IOSEEKTIM + OPTBLKSIZ/IOTFRSPEED
(25)MREADTIM = IOSEEKTIM + MBRC * OPTBLKSIZ/IOTFRSPEED
(26)SAMSIZE=最佳化器系統引數_smm_max_size*1024=58720256
(27)SASIZE=最佳化器系統引數_smm_min_size*1024=292864
(69)JOINSEL= LEASTEST(COLSEL[Outer], COLSEL[Inner])
(70)CARD[Join] = CARD[Outer]*CARD[Inner]*JOINSEL
(72)IOCOST = (#BLKS/MBRC)*(IOSEEKTIM + MBRC*OPTBLKSIZE/IOTFRSPEED)/(IOSEEKTIM+OPTBLKSIZE/IOTFRSPEED)
(73)CPUCOST = #CPUCYCLES /(CPUSPEED*SREADTIM)/1000
(99)BULDFRAGS = TRUNC(HSIZE[Outer]/OPTBLKSIZE+ 1)
(100)PROBEFRAGS = TRUNC(HSIZE[Inner]/OPTBLKSIZE+ 1)
(101)HASHBLKS = BULDFRAGS + PROBEFRAGS
(102)IOCOST[Hash] = CEIL(2*HASHBLKS/(MBRC-1)*(MREADTIM/SREADTIM+MBRC/TRUNC(SAMAXIO/OPTBLKSIZE) - (1+MREADTIM/SREADTIM/TRUNC(SAMAXIO/OPTBLKSIZE))))
(103)IOCOST[HJ]= IOCOST[Outer] + IOCOST[Inner] + IOCOST[Hash]
(106)SACL[Outer]= ACL[OuterColl]+...ACL[OuterColn]
(107)SACL[Inner]= ACL[InnerColl]+...ACL[InnerColn]
(108)HSIZE[Outer]= CARD[Outer]*(LEAST(ARL[Outer], SACL[Outer])+12)
(109)HSIZE[Inner]= CARD[Inner]*(LEAST(ARL[Outer], SACL[Inner])+12)
(110)#PTNS = GREATEST( DOP[Outer], DOP[Inner])
(111)MAXHASHAREA= GREATEST(160,SMINIO*4/OPTBLKSIZE,CEIL(SMPXMAX*2/#PTNS/OPTBLKSIZE),SMMAX/(160* 1024))
(112)HASHAREA= CEIL(GREATEST(SMINIO*4/OPTBLKSIZE,MAXHASHAERA*0.2))
(113)當HSIZE[Outer]>HASHAREA*OPTBLKSIZE*#PTNS,說明hash需要寫入磁碟,否則IOCOST[Hash]=0
(114)IOCOST[Outer](Paralle1)= ROUND(IOCOST[Outer](Seria1)*10/9/DOP[Outer])
(115)IOCOST[Inner](Paralle1) = ROUND(IOCOST[Inner](Seria1)*10/9/DOP[Inner])
(116)IOCOST[Hash] = CEIL(2*HASHBLKS/(MBRC-1 )*(MREADTIM/SREADTIM+MBRC/TRUNC(SAMINIO/OPTBLKSIZE)-(1 + MREADTIM/SREADTIM/TRUNC(SAMINIO/OPTBLKSIZE))))
(117)IOCOST[HJ]= IOCOST[Outer](Parallel)+IOCOST[Inner](Parallel)+IOCOST[Hash]

--套用上面的公式及資料進行計算
表名:T_TEST1
(3)ALLROWS=dba_tab1es.NUM_ROWS=690672
列名:OBJECT_ID
(1)NDV=dba_tab_co1umns.num_distinct=87448
(2)DENS=dba_tab_co1umns.DENSITY=.0000114353673039978
(6)COLNB=dba_tab_co1umns.NULLABLE=Y

表名:T_TEST2
(3)ALLROWS=dba_tab1es.NUM_ROWS=690680
列名:OBJECT_ID
(1)NDV=dba_tab_co1umns.num_distinct=87448
(2)DENS=dba_tab_co1umns.DENSITY=.0000114353673039978
(6)COLNB=dba_tab_co1umns.NULLABLE=Y

(11)MBRC=最佳化器系統引數_db_fi1e_optimizer_read_count=8
(12)SAMINIO=最佳化器系統引數_smm_auto_min_io_size*1024=56*1024=57344
(13)SAMAXIO=最佳化器系統引數_smm_auto_max_io_size*1024=248*1024=253952
(14)OPTBLKSIZE=最佳化器系統引數_optimizer_b1ock_size=8192
(15)SMPXMAX=最佳化器系統引數_smm_px_max_size=143360*1024=146800640
(21)CPUSPEED=系統統計資料CPUSPEEDNW=3074.07407407407
(22)IOTFRSPEED=系統統計資料IOTFRSPEED=4096
(23)IOSEEKTIM=系統統計資料IOSEEKTIM=10
(24)SREADTIM = IOSEEKTIM + OPTBLKSIZ/IOTFRSPEED=10+8192/4096=12
(25)MREADTIM = IOSEEKTIM + MBRC * OPTBLKSIZ/IOTFRSPEED=10+8*8192/4096=26
(26)SAMSIZE=最佳化器系統引數_smm_max_size*1024=58720256
(27)SASIZE=最佳化器系統引數_smm_min_size*1024=292864

--TABLE ACCESS    FULL
因為兩個全表掃描都沒有過濾條件,所以
ROWS[Outer]=ALLROWS=690672
ROWS[Inner]=ALLROWS=690680
(72)IOCOST = (#BLKS/MBRC)*(IOSEEKTIM + MBRC*OPTBLKSIZE/IOTFRSPEED)/(IOSEEKTIM+OPTBLKSIZE/IOTFRSPEED)
IOCOST[Outer] = (#BLKS/MBRC)*(IOSEEKTIM + MBRC*OPTBLKSIZE/IOTFRSPEED)/(IOSEEKTIM+OPTBLKSIZE/IOTFRSPEED)
              = (10125/8)*(10+8*8192/4096)/(10+8192/4096)
              = 2742.1875
由於_optimizer_ceil_cost=true,_table_scan_cost_plus_one=true,所以微調為:
IOCOST[Outer]=ceil(2742.1875)+1=2744
IOCOST[Inner] = (#BLKS/MBRC)*(IOSEEKTIM + MBRC*OPTBLKSIZE/IOTFRSPEED)/(IOSEEKTIM+OPTBLKSIZE/IOTFRSPEED)
              = (10125/8)*(10+8*8192/4096)/(10+8192/4096)
              = 2742.1875
由於_optimizer_ceil_cost=true,_table_scan_cost_plus_one=true,所以微調為:
IOCOST[Inner]=ceil(2742.1875)+1=2744
(114)IOCOST[Outer](Paralle1)= ROUND(IOCOST[Outer](Seria1)*10/9/DOP[Outer])
                            =ROUND(2744*10/9/2)
                            =1524
(115)IOCOST[Inner](Paralle1) = ROUND(IOCOST[Inner](Seria1)*10/9/DOP[Inner])
                             =ROUND(2744*10/9/2)
                             =1524
 --HASH JOIN
(69)JOINSEL= LEASTEST(COLSEL[Outer], COLSEL[Inner])
COLSEL[Outer]=1/NDV=1/87448
COLSEL[Inner]=1/NDV=1/87448
JOINSEL=LEAST(1/87448,1/87448)=1/87448

(70)CARD[Join] = CARD[Outer]*CARD[Inner]*JOINSEL
CARD[Outer]=ROWS[Outer]=690672
CARD[Inner]=ROWS[Inner]=690680
CARD[Join]= CARD[Outer]*CARD[Inner]*JOINSEL
          =690672*690680*1/87448
          =5455051.42438935= 5455051                           

(106)SACL[Outer]= ACL[OuterColl]+...ACL[OuterColn]
              =100
(107)SACL[Inner]= ACL[InnerColl]+...ACL[InnerColn]
               =100
(108)HSIZE[Outer]= CARD[Outer]*(LEAST(ARL[Outer], SACL[Outer])+12)
                =690672*(LEAST(98, 100)+12)
                =75973920
(109)HSIZE[Inner] = CARD[Inner]*(LEAST(ARL[Inner], SACL[Inner])+12)
                 =690680*(LEAST(98, 100)+12)
                 =75974800
(110)#PTNS = GREATEST( DOP[Outer], DOP[Inner])
DOP[Outer]=並行度=2
DOP[Inner]=並行度=2
#PTNS = GREATEST( DOP[Outer], DOP[Inner])
      =GREATEST(2,2)
      =2
(111)MAXHASHAREA= GREATEST(160,SMINIO*4/OPTBLKSIZE,CEIL(SMPXMAX*2/#PTNS/OPTBLKSIZE),SMMAX/(160* 1024))
                =GREATEST(160,57344*4/8192,CEIL(146800640*2/2/8192),58720256/(160* 1024))
                =17920
(112)HASHAREA= CEIL(GREATEST(SMINIO*4/OPTBLKSIZE,MAXHASHAERA*0.2))
             = CEIL(GREATEST(57344*4/8192,17920*0.08))
             =1434
(113)當HSIZE[Outer]>HASHAREA*OPTBLKSIZE*#PTNS,說明hash需要寫入磁碟,否則IOCOST[Hash]=0
HSIZE[Outer](75973920)>HASHAREA*OPTBLKSIZE(1434*8192*2=23494656)
(99)BULDFRAGS = TRUNC(HSIZE[Outer]/OPTBLKSIZE+ 1)
當在並行中:BULDFRAGS = CEIL(TRUNC(HSIZE[Outer]/OPTBLKSIZE+ 1)/#PTNS)
              =CEIL(TRUNC(75973920/8192+ 1)/2)
              =4638
(100)PROBEFRAGS = TRUNC(HSIZE[Inner]/OPTBLKSIZE+ 1)
當在並行中:PROBEFRAGS = CEIL(TRUNC(HSIZE[Inner]/OPTBLKSIZE+ 1)/#PTNS)
                = CEIL(TRUNC(75974800/8192+ 1)/2)
                =4638
(101)HASHBLKS = BULDFRAGS + PROBEFRAGS
              =4638+4638
              =9276
(116)IOCOST[Hash] = CEIL(2*HASHBLKS/(MBRC-1 )*(MREADTIM/SREADTIM+MBRC/TRUNC(SAMINIO/OPTBLKSIZE)-(1 + MREADTIM/SREADTIM/TRUNC(SAMINIO/OPTBLKSIZE))))
                  =CEIL(2*9276/(8-1)*(26/12+8/TRUNC(253952/8192) - (1+26/12/TRUNC(253952/8192))))
                  =3591
(117)IOCOST[HJ]= IOCOST[Outer](Parallel)+IOCOST[Inner](Parallel)+IOCOST[Hash]
               = 1524+1524+3591
               = 6639
(73)CPUCOST = 754199663 /(3074.07407407407*12)/1000=20.4451715873494
COST=IOCOST+CPUCOST=6639+20.4451715873494=6659.44517158735=6659

--可以看到,結果與執行計劃基本相同
ROWS=CARD[Join]=5455051
IOCOST[Outer](Parallel)=1524
IOCOST[Inner](Parallel)=1524
IOCOST[HJ]=6639
CPUCOST = 20.4451715873494
COST=IOCOST[HJ]=+CPUCOST=6659

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/28539951/viewspace-1674587/,如需轉載,請註明出處,否則將追究法律責任。

相關文章