[20170402]函式索引standard_hash.txt

lfree發表於2017-04-04
[20170402]函式索引standard_hash.txt

--//這幾天放假,在家裡看書<<Apress.Expert.Oracle.Indexing.and.Access.Paths.Maximum.Performance.for.Your.Database.2nd.Edition.148421983X.pdf>>
--//這本書比較合適初學者,我以前看過第一版,所以這個版本看的很快.

--//裡面提到函式standard_hash,可以用來滿足一般的查詢,做一個記錄.
1.環境:
SCOTT@test01p> @ ver1

PORT_STRING                    VERSION        BANNER                                                                               CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0           12.1.0.1.0     Oracle Database 12c Enterprise Edition Release 12.1.0.1.0 - 64bit Production              0

2.測試資料:
create table t (id number ,v1 varchar2(200) ,v2 nvarchar2(200));

insert into t
select level
,lpad('x',100,'x')||level v1
,lpad('a',100,'a')||level v2
from dual
connect by level < 2e5;

create index i_t_v1 on t(v1);
create index i_t_v2 on t(v2);

--//分析略.
SCOTT@test01p> select segment_name,header_file,header_block, BYTES,BLOCKS from dba_segments where owner=user and segment_name in ('I_T_V1','I_T_V2');
SEGMENT_NAME         HEADER_FILE HEADER_BLOCK      BYTES     BLOCKS
-------------------- ----------- ------------ ---------- ----------
I_T_V1                         9          362   28311552       3456
I_T_V2                         9        10602   54525952       6656

--//因為nvarchar2型別的儲存問題,導致在v2欄位建立的索引比在v1欄位建立的索引大許多.

3.測試:
select  * from t where v2=lpad('a',100,'a')||42;
Plan hash value: 970572957
-----------------------------------------------------------------------------------------------
| Id  | Operation                           | Name   | E-Rows |E-Bytes| Cost (%CPU)| E-Time   |
-----------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                    |        |        |       |     5 (100)|          |
|   1 |  TABLE ACCESS BY INDEX ROWID BATCHED| T      |      1 |   323 |     5   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN                  | I_T_V2 |      1 |       |     4   (0)| 00:00:01 |
-----------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1
   2 - SEL$1 / T@SEL$1

select  * from t where v1=lpad('x',100,'x')||42;
Plan hash value: 1965038091
-----------------------------------------------------------------------------------------------
| Id  | Operation                           | Name   | E-Rows |E-Bytes| Cost (%CPU)| E-Time   |
-----------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                    |        |        |       |     4 (100)|          |
|   1 |  TABLE ACCESS BY INDEX ROWID BATCHED| T      |      1 |   323 |     4   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN                  | I_T_V1 |      1 |       |     3   (0)| 00:00:01 |
-----------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1
   2 - SEL$1 / T@SEL$1

--//都能很好的使用索引,而如果建立直方圖呢?這也是我生產系統遇到的問題.

3.建立直方圖:
SCOTT@test01p> execute sys.dbms_stats.gather_table_stats ( OwnName => user,TabName => 't',Estimate_Percent => NULL,Method_Opt => 'FOR ALL COLUMNS SIZE auto ',Cascade => True ,No_Invalidate => false)
PL/SQL procedure successfully completed.

SCOTT@test01p> @ tab_lh scott T ''
DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
INPUT   OWNER TABLE_NAME COLUMN
SAMPLE  : @ TAB_LH TABLE_NAME [COLUMN_NAME]
IF NOT INPUT COLUMN_NAME ,USE "" .

COLUMN_NAME              DATA_TYPE  DATA_LENGTH N NUM_DISTINCT    DENSITY SAMPLE_SIZE TRANS_LOW                        TRANS_HIGH                        NUM_NULLS NUM_BUCKETS LAST_ANALYZED       HISTOGRAM       DATA_DEFAULT
------------------------ ---------- ----------- - ------------ ---------- ----------- -------------------------------- -------------------------------- ---------- ----------- ------------------- --------------- ---------------
ID                       NUMBER              22 Y       199999    .000005      199999 1                                199999                                    0           1 2017-04-04 21:14:52 NONE
V1                       VARCHAR2           200 Y            1   .0000025      199999 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx          0           1 2017-04-04 21:14:52 FREQUENCY
                                                                                      xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

V2                       NVARCHAR2          400 Y            1   .0000025      199999                                                                            0           1 2017-04-04 21:14:52 FREQUENCY

--//可以發現建立了直方圖.可以發現v1,v2 NUM_DISTINCT=1.

select  * from t where v2=lpad('a',100,'a')||42;
Plan hash value: 1601196873
---------------------------------------------------------------------------
| Id  | Operation         | Name | E-Rows |E-Bytes| Cost (%CPU)| E-Time   |
---------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |        |       |  2739 (100)|          |
|*  1 |  TABLE ACCESS FULL| T    |    199K|    61M|  2739   (1)| 00:00:01 |
---------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1

select  * from t where v1=lpad('x',100,'x')||42;
Plan hash value: 1601196873
---------------------------------------------------------------------------
| Id  | Operation         | Name | E-Rows |E-Bytes| Cost (%CPU)| E-Time   |
---------------------------------------------------------------------------
|   0 | SELECT STATEMENT  |      |        |       |  2739 (100)|          |
|*  1 |  TABLE ACCESS FULL| T    |    199K|    61M|  2739   (1)| 00:00:01 |
---------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1

--//因為11G下對於建立直方圖僅僅對前面32個字元敏感,而對於nvarchar2型別僅僅對前面16個字元敏感(注與安裝時選擇的字符集有關).
--//我一般選擇國家字符集AL16UTF16,看來以後越來越趨向使用AL32UTF8.這樣預設情況下直方圖變成了雞肋,毫無用處,執行計劃選擇了全表掃描.

4.這樣可以使用一種特殊的方法解決,就算反轉索引:
drop index i_t_v2;
create index if_t_v2 on t(v2 desc);
select  * from t where v2=lpad('a',100,'a')||42;

Plan hash value: 2068059304
------------------------------------------------------------------------------------------------
| Id  | Operation                           | Name    | E-Rows |E-Bytes| Cost (%CPU)| E-Time   |
------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                    |         |        |       |    43 (100)|          |
|   1 |  TABLE ACCESS BY INDEX ROWID BATCHED| T       |      1 |   323 |    43   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN                  | IF_T_V2 |      1 |       |    42   (0)| 00:00:01 |
------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1
   2 - SEL$1 / T@SEL$1

--//注:反轉索引包含NULL,這點注意.而且不合適範圍掃描.比如like.
--//可以發現使用索引.

--//即使建立直方圖也一樣.
SCOTT@test01p> execute sys.dbms_stats.gather_table_stats ( OwnName => nvl('',user),TabName => 't',Estimate_Percent => NULL,Method_Opt => 'FOR ALL COLUMNS SIZE auto for all hidden columns size 254 ',Cascade => True ,No_Invalidate => false)

SCOTT@test01p> @ tab_lh scott T ''

DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
INPUT   OWNER TABLE_NAME COLUMN
SAMPLE  : @ TAB_LH TABLE_NAME [COLUMN_NAME]
IF NOT INPUT COLUMN_NAME ,USE "" .

COLUMN_NAME              DATA_TYPE  DATA_LENGTH N NUM_DISTINCT    DENSITY SAMPLE_SIZE TRANS_LOW                        TRANS_HIGH                        NUM_NULLS NUM_BUCKETS LAST_ANALYZED       HISTOGRAM       DATA_DEFAULT
------------------------ ---------- ----------- - ------------ ---------- ----------- -------------------------------- -------------------------------- ---------- ----------- ------------------- --------------- --------------------
ID                       NUMBER              22 Y       199999    .000005      199999 1                                199999                                    0           1 2017-04-04 21:28:13 NONE
V1                       VARCHAR2           200 Y       199999    .000005      199999 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx          0           1 2017-04-04 21:28:13 NONE
                                                                                      xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
V2                       NVARCHAR2          400 Y       199999    .000005      199999                                                                            0           1 2017-04-04 21:28:13 NONE
SYS_NC00004$             RAW                601 Y            1   .0000025      199999                                                                            0           1 2017-04-04 21:29:00 FREQUENCY       "V2"

--//這樣又會選擇全表掃描.SYS_NC00004$ 的 NUM_DISTINCT=1.

--//同時又產生一個問題,如果欄位很長,索引很大,而使用函式standard_hash,這樣就很好解決這個問題.
--//而不用修改sql語句.
drop index if_t_v2;
create index if_t_v2 on t(standard_hash(v2));
--//重新分析,建立函式索引最好重新分析,因為預設oracle不會在這個欄位上建立一些統計資訊.

SCOTT@test01p> select segment_name,header_file,header_block, BYTES,BLOCKS from dba_segments where owner=user and segment_name in ('I_T_V1','IF_T_V2');
SEGMENT_NAME         HEADER_FILE HEADER_BLOCK      BYTES     BLOCKS
-------------------- ----------- ------------ ---------- ----------
I_T_V1                         9          362   28311552       3456
IF_T_V2                        9        10602    8388608       1024

--//對比前面可以發現IF_T_V2佔用空間小了許多.

select  * from t where v2=lpad('a',100,'a')||42;
Plan hash value: 2068059304
------------------------------------------------------------------------------------------------
| Id  | Operation                           | Name    | E-Rows |E-Bytes| Cost (%CPU)| E-Time   |
------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                    |         |        |       |     4 (100)|          |
|*  1 |  TABLE ACCESS BY INDEX ROWID BATCHED| T       |      1 |   344 |     4   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN                  | IF_T_V2 |      1 |       |     3   (0)| 00:00:01 |
------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1
   2 - SEL$1 / T@SEL$1

--//當然對於範圍掃描一樣無效.
--//建立直方圖呢?

SCOTT@test01p> execute sys.dbms_stats.gather_table_stats ( OwnName => nvl('',user),TabName => 't',Estimate_Percent => NULL,Method_Opt => 'FOR ALL COLUMNS SIZE auto for all hidden columns size 254 ',Cascade => True ,No_Invalidate => false)
PL/SQL procedure successfully completed.

DISPLAY TABLE_NAME OF COLUMN_NAME INFORMATION.
INPUT   OWNER TABLE_NAME COLUMN
SAMPLE  : @ TAB_LH TABLE_NAME [COLUMN_NAME]
IF NOT INPUT COLUMN_NAME ,USE "" .

COLUMN_NAME              DATA_TYPE  DATA_LENGTH N NUM_DISTINCT    DENSITY SAMPLE_SIZE TRANS_LOW                        TRANS_HIGH                        NUM_NULLS NUM_BUCKETS LAST_ANALYZED       HISTOGRAM       DATA_DEFAULT
------------------------ ---------- ----------- - ------------ ---------- ----------- -------------------------------- -------------------------------- ---------- ----------- ------------------- --------------- --------------------
ID                       NUMBER              22 Y       199999    .000005      199999 1                                199999                                    0           1 2017-04-04 21:28:13 NONE
V1                       VARCHAR2           200 Y       199999    .000005      199999 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx          0           1 2017-04-04 21:28:13 NONE
                                                                                      xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

V2                       NVARCHAR2          400 Y       199999    .000005      199999                                                                            0           1 2017-04-04 21:28:13 NONE
SYS_NC00004$             RAW                 20 Y       200000    .000005      200000                                                                            0         254 2017-04-04 21:57:22 HYBRID          STANDARD_HASH("V2")

--//可以發現SYS_NC00004$ NUM_DISTINCT=200000.
select  * from t where v2=lpad('a',100,'a')||42
Plan hash value: 2068059304
------------------------------------------------------------------------------------------------
| Id  | Operation                           | Name    | E-Rows |E-Bytes| Cost (%CPU)| E-Time   |
------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                    |         |        |       |     4 (100)|          |
|*  1 |  TABLE ACCESS BY INDEX ROWID BATCHED| T       |      1 |   344 |     4   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN                  | IF_T_V2 |      1 |       |     3   (0)| 00:00:01 |
------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1
   2 - SEL$1 / T@SEL$1
--//依舊會使用索引.

5.//這樣就提出了一個索引建立思路,如果要建立的索引欄位很長,而沒有範圍查詢要求,可以透過建立standard_hash函式索引,來解決這
--//個問題.另外我還測試ora_hash函式,這個無法解決這個問題.要修改sql語句,
--//另外這個函式遇到clob欄位會出現異常.
--//重複測試:

create table t1 (a number,b varchar2(20),c clob);
insert into t1 values (1,'a','a');
commit ;

SCOTT@test01p> select ora_hash(b),ora_hash(c) from t1;
ORA_HASH(B) ORA_HASH(C)
----------- -----------
   29220167  1172292995

--//等3秒.

SCOTT@test01p> select ora_hash(b),ora_hash(c) from t1;
ORA_HASH(B) ORA_HASH(C)
----------- -----------
   29220167  4224997194

--//可以發現ORA_HASH(C)發生了改變. 12c執行更長的字串,可以達到32k.我以前提過超過4000欄位,應該按照lob來儲存.
--//http://blog.itpub.net/267265/viewspace-776806/

drop table t1 purge;
create table t1 (id number ,v1 varchar2(32767) );
insert into t1 values (1,lpad('n',100,'n'));
insert into t1 values (2,lpad('n',30000,'n'));
insert into t1 values (3,'a');
commit ;
select dump(ora_hash(v1) from t1;
--sleep 3
select dump(ora_hash(v1) from t1;

SCOTT@test01p> select id,dump(ora_hash(v1)) c40,ora_hash(v1) from t1;
        ID C40                                      ORA_HASH(V1)
---------- ---------------------------------------- ------------
         1 Typ=2 Len=6: 197,40,14,10,79,24            3913097823
         2 Typ=2 Len=6: 197,12,8,62,3,83              1107610282
         3 Typ=2 Len=5: 196,30,23,2,68                  29220167

--//等3秒.
SCOTT@test01p> select id,dump(ora_hash(v1)) c40,ora_hash(v1) from t1;
        ID C40                                      ORA_HASH(V1)
---------- ---------------------------------------- ------------
         1 Typ=2 Len=6: 197,40,14,10,79,24            3913097823
         2 Typ=2 Len=6: 197,12,8,62,3,83              1107610282
         3 Typ=2 Len=5: 196,30,23,2,68                  29220167

--//沒有使用clob欄位型別的問題.還是不推薦這樣使用.

總結:
1.可以發現函式standard_hash建立索引,可以用於等值查詢,實際上如果你看執行計劃可以發現有一點點不同.
select  * from t where v2=lpad('a',100,'a')||42;
Plan hash value: 2068059304
------------------------------------------------------------------------------------------------
| Id  | Operation                           | Name    | E-Rows |E-Bytes| Cost (%CPU)| E-Time   |
------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                    |         |        |       |     4 (100)|          |
|*  1 |  TABLE ACCESS BY INDEX ROWID BATCHED| T       |      1 |   344 |     4   (0)| 00:00:01 |
|*  2 |   INDEX RANGE SCAN                  | IF_T_V2 |      1 |       |     3   (0)| 00:00:01 |
------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1 / T@SEL$1
   2 - SEL$1 / T@SEL$1
Predicate Information (identified by operation id):
---------------------------------------------------
   1 - filter("V2"=U'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
              aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa42')
   2 - access("T"."SYS_NC00004$"=HEXTORAW('6B59D7D1A31FB4B3B6820FEA9D234B1B352596A9'))
--//除了access,還多了1個filter.

2.實際上除了standard_hash,還有trunc,substr(欄位,1,N)也可以用於等值查詢,可以使用這樣的函式索引,其他我還不知道.
  但是standard_hash不能支援範圍查詢.但是帶來是索引佔用空間減少.

3.另外必須要注意一個問題,就是密集插入可能會消耗CPU資源,這點特別注意.

drop table t purge ;
create table t (id number ,v1 varchar2(200) ,v2 nvarchar2(200));
create index i_t_v2 on t(v2);

set timing on
insert into t
select level
,lpad('x',100,'x')||level v1
,lpad('a',100,'a')||level v2
from dual
connect by level < 2e5;

Elapsed: 00:00:46.01
--//使用46秒.

--//重複測試:
drop table t purge ;
create table t (id number ,v1 varchar2(200) ,v2 nvarchar2(200));
create index if_t_v2 on t(standard_hash(v2));

insert into t
select level
,lpad('x',100,'x')||level v1
,lpad('a',100,'a')||level v2
from dual
connect by level < 2e5;

Elapsed: 00:00:15.34
--//使用15秒.

--//重複測試使用索引:
drop table t purge ;
create table t (id number ,v1 varchar2(200) ,v2 nvarchar2(200));

insert into t
select level
,lpad('x',100,'x')||level v1
,lpad('a',100,'a')||level v2
from dual
connect by level < 2e5;
Elapsed: 00:00:02.04
--//使用2秒.

--//索引這種cpu運算強度還是很大的,前面第1種情況主要是建立索引消耗磁碟空間大.如果選擇小的字元呢.

drop table t purge ;
create table t (id number ,v1 varchar2(200) ,v2 nvarchar2(200));
create index i_t_v2 on t(v2);

insert into t
select level
,lpad('x',190,'x')||level v1
,lpad('a',6,'a')||level v2
from dual
connect by level < 2e5;

Elapsed: 00:00:06.18
--//僅僅需要6秒.如果在這種的情況使用standard_hash索引呢?

drop table t purge ;
create table t (id number ,v1 varchar2(200) ,v2 nvarchar2(200));
create index if_t_v2 on t(standard_hash(v2));

insert into t
select level
,lpad('x',190,'x')||level v1
,lpad('a',6,'a')||level v2
from dual
connect by level < 2e5;
Elapsed: 00:00:12.46

--//欄位長度小效果就不明顯了.

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2136563/,如需轉載,請註明出處,否則將追究法律責任。

相關文章