[20170114]12c varchar2型別直方圖.txt

lfree發表於2017-01-15
[20170114]12c varchar2型別直方圖.txt

--我曾經提到慎用nvarchar2資料型別,連結:http://blog.itpub.net/267265/viewspace-2120925/
--我那裡提到資料型別nvarchar2型別,因為1個字元佔用2個位元組,這樣如果前面16個字元重複很多,直方圖的建立就是雞肋,
--毫無用處(因為分析僅僅對前面32個位元組有效),12c 直方圖支援更多型別: 高度直方圖,頻率直方圖.混和型別(HYBRID).

--看看12c關於直方圖方面的變化,透過例子說明:

1.環境:
SCOTT@test01p> @ ver1
PORT_STRING                    VERSION        BANNER                                                                               CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0           12.1.0.1.0     Oracle Database 12c Enterprise Edition Release 12.1.0.1.0 - 64bit Production              0

create table t (a varchar2(100));
insert into t select  '12345678901234567890123456789012'||level||'3456789123456789123456' a from dual connect by level < 5000;
commit ;

2.建立直方圖:

SCOTT@test01p> exec DBMS_STATS.GATHER_TABLE_STATS(null,'T',method_opt => 'FOR ALL COLUMNS SIZE 2049');
BEGIN DBMS_STATS.GATHER_TABLE_STATS(null,'T',method_opt => 'FOR ALL COLUMNS SIZE 2049'); END;

*
ERROR at line 1:
ORA-20000: Cannot parse for clause: FOR ALL COLUMNS SIZE 2049
ORA-06512: at "SYS.DBMS_STATS", line 33859
ORA-06512: at line 1
--2049 太大了,11g最大支援255.

SCOTT@test01p> exec DBMS_STATS.GATHER_TABLE_STATS(null,'T',method_opt => 'FOR ALL COLUMNS SIZE 2048');
PL/SQL procedure successfully completed.

--//12C可以支援2048.

SELECT COLUMN_NAME
      ,NUM_DISTINCT
      ,DENSITY
      ,NUM_BUCKETS
      ,HISTOGRAM
      ,LENGTH (UTL_RAW.cast_to_varchar2 (low_value)) L_length
      ,UTL_RAW.cast_to_varchar2 (low_value) L_A
      ,LENGTH (UTL_RAW.cast_to_varchar2 (HIGH_VALUE)) H_length
      ,UTL_RAW.cast_to_varchar2 (HIGH_VALUE) H_A
  FROM USER_TAB_COL_STATISTICS
 WHERE table_name = 'T';

Record View
As of: 2017/1/14 21:25:12

COLUMN_NAME:   A
NUM_DISTINCT:  4999
DENSITY:       0.0002
NUM_BUCKETS:   2048
HISTOGRAM:     HYBRID
L_LENGTH:      58
L_A:           1234567890123456789012345678901210003456789123456789123456
H_LENGTH:      57
H_A:           123456789012345678901234567890129993456789123456789123456

--//你可以發現儲存的長度發生了變數,而且bucket的數量增加到了2048.直方圖型別HYBRID.
--//也可以查詢:select * from DBA_TAB_HISTOGRAMS where owner=user and table_name='T' order by endpoint_number;
--//結果太長,大家可以自行檢測.

3.再來看看查詢的sql語句:

SCOTT@test01p> alter session set statistics_level=all;
Session altered.

SCOTT@test01p> select * from t where a='12345678901234567890123456789012'||1001||'3456789123456789123456';
A
----------------------------------------------------------
1234567890123456789012345678901210013456789123456789123456

SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  c4asjnvdvyqhn, child number 0
-------------------------------------
select * from t where a='12345678901234567890123456789012'||1001||'34567
89123456789123456'
Plan hash value: 1601196873
--------------------------------------------------------------------------------------------------------------------
| Id  | Operation         | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |
--------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |      1 |        |       |    15 (100)|          |      1 |00:00:00.01 |      54 |
|*  1 |  TABLE ACCESS FULL| T    |      1 |      1 |    59 |    15   (0)| 00:00:01 |      1 |00:00:00.01 |      54 |
--------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
   1 - filter("A"='1234567890123456789012345678901210013456789123456789123456')

--//可以發現E-Rows與A-Rows一樣.

4.也就是oracle 12c現在能很好的處理長字串直方圖問題,但是究竟支援多長,我繼續測試:

create table t1 (a varchar2(1000));
insert into  t1 select  lpad('x',996,'x')||level a from dual connect by level < 5000;
commit ;

exec DBMS_STATS.GATHER_TABLE_STATS(null,'T1',method_opt => 'FOR ALL COLUMNS SIZE 2048');

SELECT COLUMN_NAME
      ,NUM_DISTINCT
      ,DENSITY
      ,NUM_BUCKETS
      ,HISTOGRAM
      ,LENGTH (UTL_RAW.cast_to_varchar2 (low_value)) L_length
      ,UTL_RAW.cast_to_varchar2 (low_value) L_A
      ,LENGTH (UTL_RAW.cast_to_varchar2 (HIGH_VALUE)) H_length
      ,UTL_RAW.cast_to_varchar2 (HIGH_VALUE) H_A
  FROM USER_TAB_COL_STATISTICS
 WHERE table_name = 'T1';

 Record View
As of: 2017/1/14 21:41:33

COLUMN_NAME:   A
NUM_DISTINCT:  4999
DENSITY:       0.0002
NUM_BUCKETS:   1
HISTOGRAM:     HYBRID
L_LENGTH:      64
L_A:           xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
H_LENGTH:      64
H_A:           xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

--//也就是比原來增加了1倍,現在支援64位元組.但是看看現在的情況NUM_BUCKETS=1,也就是僅僅1個桶.

SCOTT@test01p> select * from t1 where a=lpad('x',996,'x')||1001;

SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  2cua094wf06kf, child number 0
-------------------------------------
select * from t1 where a=lpad('x',996,'x')||1001
Plan hash value: 3617692013
--------------------------------------------------------------------------------------------------------------------
| Id  | Operation         | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |
--------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |      1 |        |       |   204 (100)|          |      1 |00:00:00.01 |     753 |
|*  1 |  TABLE ACCESS FULL| T1   |      1 |   4999 |  4886K|   204   (0)| 00:00:01 |      1 |00:00:00.01 |     753 |
--------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T1@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
   1 - filter("A"='xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
               ...
              xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1001')
32 rows selected.

--//現在E-Rows,A-Rows相差很遠了.這樣的直方圖也無用.


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2132462/,如需轉載,請註明出處,否則將追究法律責任。

相關文章