[20180928]exists與cardinality.txt

lfree發表於2018-09-29

[20180928]exists與cardinality.txt

--//最佳化遇到的問題,做一個例子演示出來.
1.環境:

SCOTT@test01p> @ ver1
PORT_STRING                    VERSION        BANNER                                                                               CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0           12.1.0.1.0     Oracle Database 12c Enterprise Edition Release 12.1.0.1.0 - 64bit Production              0

2.測試建立:
create table t1 as select rownum id ,lpad('a',100,'a') vc from dual connect by level<=1000;
create table t2 as select rownum idx,mod(rownum,1000)+1 id ,lpad('b',20,'b') vc from dual connect by level<=40000;

create unique index pk_t1 on t1(id);
alter table t1  add constraint pk_t1  primary key (id);

create unique index pk_t2 on t2(idx);
alter table t2  add constraint pk_t2  primary key (idx);

create index i_t2_id on t2(id);

3.測試1:

SCOTT@test01p> alter session set statistics_level = all;
Session altered.

SCOTT@test01p> select * from t1 where exists (select 1 from t2 where t2.id=t1.id) and t1.id=32;
        ID VC
---------- ----------------------------------------------------------------------------------------------------
        32 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa


--//執行計劃如下:
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  ajnkhp6968v8r, child number 1
-------------------------------------
select * from t1 where exists (select 1 from t2 where t2.id=t1.id) and
t1.id=32
Plan hash value: 1277462125
-------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                    | Name    | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers | Reads  |
-------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |         |      1 |        |       |     3 (100)|          |      1 |00:00:00.02 |       5 |      1 |
|   1 |  NESTED LOOPS SEMI           |         |      1 |      1 |   109 |     3   (0)| 00:00:01 |      1 |00:00:00.02 |       5 |      1 |
|   2 |   TABLE ACCESS BY INDEX ROWID| T1      |      1 |      1 |   105 |     2   (0)| 00:00:01 |      1 |00:00:00.01 |       3 |      0 |
|*  3 |    INDEX UNIQUE SCAN         | PK_T1   |      1 |      1 |       |     1   (0)| 00:00:01 |      1 |00:00:00.01 |       2 |      0 |
|*  4 |   INDEX RANGE SCAN           | I_T2_ID |      1 |     40 |   160 |     1   (0)| 00:00:01 |      1 |00:00:00.02 |       2 |      1 |
-------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$5DA710D3
   2 - SEL$5DA710D3 / T1@SEL$1
   3 - SEL$5DA710D3 / T1@SEL$1
   4 - SEL$5DA710D3 / T2@SEL$2
Predicate Information (identified by operation id):
---------------------------------------------------
   3 - access("T1"."ID"=32)
   4 - access("T2"."ID"=32)
       filter("T2"."ID"="T1"."ID")

--//實際上開始讓我困惑的是id=4,E_rows=40,實際上exists只要1條滿足條件就ok了.不需要繼續判斷,有點短路的作用.
--//這裡非常容易誤判,我們生產系統E_rows更高,差點給誤導了.

4.測試2:
--//測試not exists的情況如下:
SCOTT@test01p> select * from t1 where not exists (select 1 from t2 where t2.id=t1.id) and t1.id=32;

no rows selected

SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  21f5mqdya13v8, child number 1
-------------------------------------
select * from t1 where not exists (select 1 from t2 where t2.id=t1.id)
and t1.id=32
Plan hash value: 1740670345
----------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                    | Name    | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |
----------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT             |         |      1 |        |       |     3 (100)|          |      0 |00:00:00.01 |       5 |
|   1 |  NESTED LOOPS ANTI           |         |      1 |      1 |   109 |     3   (0)| 00:00:01 |      0 |00:00:00.01 |       5 |
|   2 |   TABLE ACCESS BY INDEX ROWID| T1      |      1 |      1 |   105 |     2   (0)| 00:00:01 |      1 |00:00:00.01 |       3 |
|*  3 |    INDEX UNIQUE SCAN         | PK_T1   |      1 |      1 |       |     1   (0)| 00:00:01 |      1 |00:00:00.01 |       2 |
|*  4 |   INDEX RANGE SCAN           | I_T2_ID |      1 |     40 |   160 |     1   (0)| 00:00:01 |      1 |00:00:00.01 |       2 |
----------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$5DA710D3
   2 - SEL$5DA710D3 / T1@SEL$1
   3 - SEL$5DA710D3 / T1@SEL$1
   4 - SEL$5DA710D3 / T2@SEL$2
Predicate Information (identified by operation id):
---------------------------------------------------
   3 - access("T1"."ID"=32)
   4 - access("T2"."ID"=32)
31 rows selected.


--//實際上我遇到的最佳化問題就是一個專案表有2千多條記錄,查詢業務表有那些專案已經開展的.每次進入程式介面就
--//以列表的形式顯示,呼叫如下:

select * from 專案表 where exists (select 1 from 業務表 where 業務表.專案_id=專案表.id);

--//業務表巨大無比,看到以上執行計劃的E_rows達到上萬,習慣思維,差點被誤導.
--//我自己檢視我工作筆記,這個在業務表上"專案_id"欄位索引還是我去年建立的.該索引重複值太多,而且這個索引除了這樣的查詢一點用都沒有.
--//有時候想開發為什麼寫這樣的sql語句,寫前考慮沒有.代價太大了.
--//真心希望開發寫sql語句想一想.....

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2215298/,如需轉載,請註明出處,否則將追究法律責任。

相關文章