[20220507]優化的困惑13.txt

lfree發表於2022-05-10

[20220507]優化的困惑13.txt

--//生產系統一條sql語句,優化時遇到的問題,做一個記錄:

1.環境:
> @ pr
==============================
PORT_STRING                   : x86_64/Linux 2.4.xx
VERSION                       : 19.0.0.0.0
BANNER                        : Oracle Database 19c Enterprise Edition Release 19.0.0.0.0 - Production
BANNER_FULL                   : Oracle Database 19c Enterprise Edition Release 19.0.0.0.0 - Production
Version 19.9.0.0.0
BANNER_LEGACY                 : Oracle Database 19c Enterprise Edition Release 19.0.0.0.0 - Production
CON_ID                        : 0
PL/SQL procedure successfully completed.

2.語句如下:
--//sql_id=1br2zkb5wx8np.
SELECT gy_jbbm.jbxh,gy_jbbm.jbmc,gy_jbbm.icd9,gy_jbbm.pydm
  FROM gy_jbbm
  LEFT OUTER JOIN emr_xyjbbm
    ON GY_JBBM.jbxh = emr_xyjbbm.jbxh
   AND emr_xyjbbm.zxbz = 0
 where (GY_JBBM.DMLB = 10)
   AND ( (GY_JBBM.ICD9 LIKE '%齲病%' )
    OR (GY_JBBM.JBMC LIKE '%齲病%' )
    OR (GY_JBBM.PYDM LIKE '%齲病%' )
    OR (EMR_XYJBBM.BMMC LIKE '%齲病%' )
    OR (EMR_XYJBBM.PYDM LIKE '%齲病%' ) )
 ORDER BY GY_JBBM.ICD9,GY_JBBM.JBMC,GY_JBBM.PYDM;
--//注:為了測試方便我直接帶入真實的值.

--//開啟統計後執行計劃如下:
Plan hash value: 3718689590
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                              | Name                | Starts | E-Rows |E-Bytes|E-Temp | Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |  OMem |  1Mem | Used-Mem |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                       |                     |      1 |        |       |       |  7277 (100)|          |      4 |00:00:00.15 |    1385 |       |       |          |
|   1 |  SORT ORDER BY                         |                     |      1 |    152K|    30M|    32M|  7277   (1)| 00:00:01 |      4 |00:00:00.15 |    1385 |  2048 |  2048 | 2048  (0)|
|*  2 |   FILTER                               |                     |      1 |        |       |       |            |          |      4 |00:00:00.15 |    1385 |       |       |          |
|   3 |    NESTED LOOPS OUTER                  |                     |      1 |    152K|    30M|       |   375   (1)| 00:00:01 |    152K|00:00:00.08 |    1385 |       |       |          |
|*  4 |     INDEX FAST FULL SCAN               | I_GY_JBBM_ALL       |      1 |    152K|  6863K|       |   374   (1)| 00:00:01 |    152K|00:00:00.03 |    1385 |       |       |          |
|*  5 |     TABLE ACCESS BY INDEX ROWID BATCHED| EMR_XYJBBM          |    152K|      1 |   162 |       |     0   (0)|          |      0 |00:00:00.02 |       0 |       |       |          |
|*  6 |      INDEX RANGE SCAN                  | IDX_EMR_XYJBBM_JBXH |      0 |      1 |       |       |     0   (0)|          |      0 |00:00:00.01 |       0 |       |       |          |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
   2 - filter((("GY_JBBM"."ICD9" LIKE '%齲病%' AND "GY_JBBM"."ICD9" IS NOT NULL AND "GY_JBBM"."ICD9" IS NOT NULL) OR ("GY_JBBM"."JBMC" LIKE '%齲病%' AND "GY_JBBM"."JBMC" IS NOT NULL
              AND "GY_JBBM"."JBMC" IS NOT NULL) OR ("GY_JBBM"."PYDM" LIKE '%齲病%' AND "GY_JBBM"."PYDM" IS NOT NULL AND "GY_JBBM"."PYDM" IS NOT NULL) OR "EMR_XYJBBM"."BMMC" LIKE '%齲病%' OR
              "EMR_XYJBBM"."PYDM" LIKE '%齲病%'))
   4 - filter("GY_JBBM"."DMLB"=10)
   5 - filter("EMR_XYJBBM"."ZXBZ"=0)
   6 - access("GY_JBBM"."JBXH"="EMR_XYJBBM"."JBXH")

--//補充說明,實際上這條sql語句沒有什麼好優化的,我建立I_GY_JBBM_ALL索引,覆蓋了gy_jbbm的全部顯示欄位.這樣一定程度不要全表
--//掃描gy_jbbm,減少一定的邏輯讀.奇怪的是id=2的FILTER出現在最後,如果你仔細看Predicate Information (identified by operation id):
--//可以發現oracle 自動加入了像如下條件兩遍:
"GY_JBBM"."ICD9" IS NOT NULL AND
"GY_JBBM"."ICD9" IS NOT NULL

--//不知道為什麼?實際上開始我並沒有認真看執行計劃,我開始想為什麼過濾(GY_JBBM.ICD9 LIKE '%齲病%' ) 不發生在id=4,而是出現在最後id=2.
--//這樣直接執行實際上"很慢"的,好在EMR_XYJBBM是一個沒有記錄的空表,不然id=5,starts=152K次,邏輯讀會很大.

--//我再次仔細看了sql語句.我個人並不喜歡ansi的語法,或者講更加喜歡使用+的語法,我以前經常改寫不知道那邊寫入+號.
--//我自己做了一個簡單記憶左連線 正好相反在 右邊的表使用+.

--//其實開發上面的寫法要非常小心,不小心很容易發生錯誤,參考連結:
--//http://blog.itpub.net/267265/viewspace-1988395/ => [20160213]關於ansi語法.txt
--//我想更多的是上面的寫法可能並不是開發想要寫法.或者講我認為開發寫錯了.

--//使用expandz.sql 展開看看。
$ cat tpt/expandz.sql
set long 40000
set serveroutput on
column arg new_value arg

set term off
select decode(&2,11,'sql2','utility') arg from dual;
set term on
prompt

declare
    l_sqltext clob := null;
    l_result  clob := null;
begin
        select sql_fulltext into l_sqltext from v$sqlarea where sql_id='&&1';
--      dbms_output.put_line(l_sqltext);
--      dbms_sql2.expand_sql_text(l_sqltext,l_result);
        dbms_&arg..expand_sql_text(l_sqltext,l_result);
        dbms_output.put_line(l_result);
end;
/
set serveroutput off


> @ cs PPPP_HHHH
alter session set current_schema=PPPP_HHHH
Session altered.

> @ expandz 1br2zkb5wx8np 19
SELECT "A1"."QCSJ_C000000000300000_0" "JBXH"
     , "A1"."JBMC_2" "JBMC"
     , "A1"."ICD9_4" "ICD9"
     , "A1"."QCSJ_C000000000300002_3" "PYDM"
  FROM (
        SELECT "A3"."JBXH" "QCSJ_C000000000300000_0"
             , "A3"."DMLB" "DMLB_1"
             , "A3"."JBMC" "JBMC_2"
             , "A3"."PYDM" "QCSJ_C000000000300002_3"
             , "A3"."ICD9" "ICD9_4"
             , "A3"."ZXBZ" "QCSJ_C000000000300004"
             , "A2"."JBXH" "QCSJ_C000000000300001"
             , "A2"."BMMC" "BMMC_7"
             , "A2"."PYDM" "QCSJ_C000000000300003_8"
             , "A2"."ZXBZ" "QCSJ_C000000000300005"
          FROM "PPPPPP_HHH"."GY_JBBM" "A3"
             , "PPPPPP_HHH"."EMR_XYJBBM" "A2"
         WHERE "A3"."JBXH"   = "A2"."JBXH"
   AND "A2"."ZXBZ"   = :B1) "A1"
 WHERE "A1"."DMLB_1" = :B2
   AND ("A1"."ICD9_4" LIKE :B3
    OR "A1"."JBMC_2" LIKE :B4
    OR "A1"."QCSJ_C000000000300002_3" LIKE :B5
    OR "A1"."BMMC_7" LIKE :B6
    OR "A1"."QCSJ_C000000000300003_8" LIKE :B7)
 ORDER BY "A1"."ICD9_4"
     , "A1"."JBMC_2"
     , "A1"."QCSJ_C000000000300002_3"
PL/SQL procedure successfully completed.
--//注我做了格式化處理,可以發現展開後like在最後處理。另外還可以發現oracle展開後根本不存在外連線,為什麼不懂。
--//我帶入執行,沒有結果,很明顯展開是錯誤的。
SELECT "A1"."QCSJ_C000000000300000_0" "JBXH"
     , "A1"."JBMC_2" "JBMC"
     , "A1"."ICD9_4" "ICD9"
     , "A1"."QCSJ_C000000000300002_3" "PYDM"
  FROM (
        SELECT "A3"."JBXH" "QCSJ_C000000000300000_0"
             , "A3"."DMLB" "DMLB_1"
             , "A3"."JBMC" "JBMC_2"
             , "A3"."PYDM" "QCSJ_C000000000300002_3"
             , "A3"."ICD9" "ICD9_4"
             , "A3"."ZXBZ" "QCSJ_C000000000300004"
             , "A2"."JBXH" "QCSJ_C000000000300001"
             , "A2"."BMMC" "BMMC_7"
             , "A2"."PYDM" "QCSJ_C000000000300003_8"
             , "A2"."ZXBZ" "QCSJ_C000000000300005"
          FROM "PPPPPP_HHH"."GY_JBBM" "A3"
             , "PPPPPP_HHH"."EMR_XYJBBM" "A2"
         WHERE "A3"."JBXH"   = "A2"."JBXH"
   AND "A2"."ZXBZ"   = 0) "A1"
 WHERE "A1"."DMLB_1" = 10
   AND ("A1"."ICD9_4" LIKE '%齲病%'
    OR "A1"."JBMC_2" LIKE '%齲病%'
    OR "A1"."QCSJ_C000000000300002_3" LIKE '%齲病%'
    OR "A1"."BMMC_7" LIKE '%齲病%'
    OR "A1"."QCSJ_C000000000300003_8" LIKE '%齲病%')
 ORDER BY "A1"."ICD9_4"
     , "A1"."JBMC_2"
     , "A1"."QCSJ_C000000000300002_3"

$ cat 10053x.sql
execute dbms_sqldiag.dump_trace(p_sql_id=>'&1',p_child_number=>&2,p_component=>'Compiler',p_file_id=>'a'||'&&1');

> @ 10053x 1br2zkb5wx8np 0
PL/SQL procedure successfully completed.

--//檢視跟蹤檔案:
Final query after transformations:******* UNPARSED QUERY IS *******
SELECT "GY_JBBM"."JBXH" "JBXH"
     , "GY_JBBM"."JBMC" "JBMC"
     , "GY_JBBM"."ICD9" "ICD9"
     , "GY_JBBM"."PYDM" "PYDM"
  FROM "PPPPPP_HHH"."GY_JBBM" "GY_JBBM"
     , "PPPPPP_HHH"."EMR_XYJBBM" "EMR_XYJBBM"
 WHERE "GY_JBBM"."DMLB"       = :B1
   AND ("GY_JBBM"."ICD9" LIKE :B2
    OR "GY_JBBM"."JBMC" LIKE :B3
    OR "GY_JBBM"."PYDM" LIKE :B4
    OR "EMR_XYJBBM"."BMMC" LIKE :B5
    OR "EMR_XYJBBM"."PYDM" LIKE :B6)
   AND "GY_JBBM"."JBXH"       = "EMR_XYJBBM"."JBXH"(+)
   AND "EMR_XYJBBM"."ZXBZ"(+) = :B7
 ORDER BY "GY_JBBM"."ICD9"
     , "GY_JBBM"."JBMC"
     , "GY_JBBM"."PYDM"
kkoqbc: optimizing query block SEL$2BFA4EE4 (#1)

--//你可以發現oracle實際上最後還是轉換成+的語法。

3.改寫:
--//首先我個人認為上面的寫法開發寫錯了.我使用加號改寫,注意可能不是等價的改寫!!

SELECT /*+ gather_plan_statistics */ gy_jbbm.jbxh
     , gy_jbbm.jbmc
     , gy_jbbm.icd9
     , gy_jbbm.pydm
  FROM gy_jbbm
     , emr_xyjbbm
 WHERE GY_JBBM.jbxh       = emr_xyjbbm.jbxh(+)
   AND emr_xyjbbm.zxbz(+) = 0
   AND (GY_JBBM.DMLB      = 10)
   AND ( (GY_JBBM.ICD9 LIKE '%齲病%')
    OR (GY_JBBM.JBMC LIKE '%齲病%')
    OR (GY_JBBM.PYDM LIKE '%齲病%')
    OR (EMR_XYJBBM.BMMC LIKE '%齲病%')
    OR (EMR_XYJBBM.PYDM LIKE '%齲病%'))
 ORDER BY GY_JBBM.ICD9 , GY_JBBM.JBMC , GY_JBBM.PYDM;
--//對比前面的10053x.sql看到,該語句是等價轉換。
--//執行計劃如下:
Plan hash value: 3718689590
 
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                              | Name                | Starts | E-Rows |E-Bytes|E-Temp | Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |  OMem |  1Mem | Used-Mem |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                       |                     |      1 |        |       |       |  7277 (100)|          |      4 |00:00:00.13 |    1385 |       |       |          |
|   1 |  SORT ORDER BY                         |                     |      1 |    152K|    30M|    32M|  7277   (1)| 00:00:01 |      4 |00:00:00.13 |    1385 |  2048 |  2048 | 2048  (0)|
|*  2 |   FILTER                               |                     |      1 |        |       |       |            |          |      4 |00:00:00.05 |    1385 |       |       |          |
|   3 |    NESTED LOOPS OUTER                  |                     |      1 |    152K|    30M|       |   375   (1)| 00:00:01 |    152K|00:00:00.08 |    1385 |       |       |          |
|*  4 |     INDEX FAST FULL SCAN               | I_GY_JBBM_ALL       |      1 |    152K|  6863K|       |   374   (1)| 00:00:01 |    152K|00:00:00.03 |    1385 |       |       |          |
|*  5 |     TABLE ACCESS BY INDEX ROWID BATCHED| EMR_XYJBBM          |    152K|      1 |   162 |       |     0   (0)|          |      0 |00:00:00.02 |       0 |       |       |          |
|*  6 |      INDEX RANGE SCAN                  | IDX_EMR_XYJBBM_JBXH |      0 |      1 |       |       |     0   (0)|          |      0 |00:00:00.01 |       0 |       |       |          |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - filter((("GY_JBBM"."ICD9" LIKE '%齲病%' AND "GY_JBBM"."ICD9" IS NOT NULL AND "GY_JBBM"."ICD9" IS NOT NULL) OR ("GY_JBBM"."JBMC" LIKE '%齲病%' AND "GY_JBBM"."JBMC" IS NOT NULL
              AND "GY_JBBM"."JBMC" IS NOT NULL) OR ("GY_JBBM"."PYDM" LIKE '%齲病%' AND "GY_JBBM"."PYDM" IS NOT NULL AND "GY_JBBM"."PYDM" IS NOT NULL) OR "EMR_XYJBBM"."BMMC" LIKE '%齲病%' OR
              "EMR_XYJBBM"."PYDM" LIKE '%齲病%'))
   4 - filter("GY_JBBM"."DMLB"=10)
   5 - filter("EMR_XYJBBM"."ZXBZ"=0)
   6 - access("GY_JBBM"."JBXH"="EMR_XYJBBM"."JBXH")
 
--//這時我才注意,裡面的一堆or.實際上開發要表達的是(我認為),應該之間的是and不是or.
--//我嘗試在EMR_XYJBBM表的欄位上寫入加號.(EMR_XYJBBM.BMMC(+) LIKE '%齲病%'),報如下錯誤:
ORA-01719: outer join operator (+) not allowed in operand of OR or IN

--//修改如下:
SELECT /*+ gather_plan_statistics */
        gy_jbbm.jbxh
        ,gy_jbbm.jbmc
        ,gy_jbbm.icd9
        ,gy_jbbm.pydm
    FROM gy_jbbm, emr_xyjbbm
   WHERE     GY_JBBM.jbxh = emr_xyjbbm.jbxh(+)
         AND emr_xyjbbm.zxbz(+) = 0
         AND (GY_JBBM.DMLB = 10)
         AND (   GY_JBBM.ICD9 LIKE '%齲病%'
              OR GY_JBBM.JBMC LIKE '%齲病%'
              OR GY_JBBM.PYDM LIKE '%齲病%')
         AND (   EMR_XYJBBM.BMMC(+) LIKE '%齲病%'
         ~~~
              OR EMR_XYJBBM.PYDM(+) LIKE '%齲病%')
ORDER BY GY_JBBM.ICD9, GY_JBBM.JBMC, GY_JBBM.PYDM;
--//執行計劃如下:
Plan hash value: 1931007572
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                             | Name                | Starts | E-Rows |E-Bytes|E-Temp | Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |  OMem |  1Mem | Used-Mem |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                      |                     |      1 |        |       |       |  1319 (100)|          |      4 |00:00:00.07 |    1385 |       |       |          |
|   1 |  SORT ORDER BY                        |                     |      1 |  20844 |  4233K|  4520K|  1319   (1)| 00:00:01 |      4 |00:00:00.07 |    1385 |  2048 |  2048 | 2048  (0)|
|   2 |   NESTED LOOPS OUTER                  |                     |      1 |  20844 |  4233K|       |   375   (1)| 00:00:01 |      4 |00:00:00.03 |    1385 |       |       |          |
|*  3 |    INDEX FAST FULL SCAN               | I_GY_JBBM_ALL       |      1 |  20844 |   936K|       |   375   (1)| 00:00:01 |      4 |00:00:00.03 |    1385 |       |       |          |
|*  4 |    TABLE ACCESS BY INDEX ROWID BATCHED| EMR_XYJBBM          |      4 |      1 |   162 |       |     0   (0)|          |      0 |00:00:00.01 |       0 |       |       |          |
|*  5 |     INDEX RANGE SCAN                  | IDX_EMR_XYJBBM_JBXH |      0 |      1 |       |       |     0   (0)|          |      0 |00:00:00.01 |       0 |       |       |          |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
   3 - filter(((("GY_JBBM"."JBMC" LIKE '%齲病%' AND "GY_JBBM"."JBMC" IS NOT NULL AND "GY_JBBM"."JBMC" IS NOT NULL) OR ("GY_JBBM"."PYDM" LIKE '%齲病%' AND "GY_JBBM"."PYDM" IS NOT
              NULL AND "GY_JBBM"."PYDM" IS NOT NULL) OR ("GY_JBBM"."ICD9" LIKE '%齲病%' AND "GY_JBBM"."ICD9" IS NOT NULL AND "GY_JBBM"."ICD9" IS NOT NULL)) AND "GY_JBBM"."DMLB"=10))
   4 - filter(("EMR_XYJBBM"."ZXBZ"=0 AND ("EMR_XYJBBM"."BMMC" LIKE '%齲病%' OR "EMR_XYJBBM"."PYDM" LIKE '%齲病%')))
   5 - access("GY_JBBM"."JBXH"="EMR_XYJBBM"."JBXH")
--//我想這才是開發想要的執行方式。id=4的迴圈次數現在是starts=4.
 
--//或者講開發上面使用ansi的語法,"正確"的寫法如下:
  SELECT /*+ gather_plan_statistics */
        gy_jbbm.jbxh
        ,gy_jbbm.jbmc
        ,gy_jbbm.icd9
        ,gy_jbbm.pydm
    FROM gy_jbbm
         LEFT OUTER JOIN emr_xyjbbm
            ON     GY_JBBM.jbxh = emr_xyjbbm.jbxh
               AND (   EMR_XYJBBM.BMMC LIKE '%齲病%'
                    OR EMR_XYJBBM.PYDM LIKE '%齲病%')
               AND emr_xyjbbm.zxbz = 0
   WHERE     GY_JBBM.DMLB = 10
         AND (   GY_JBBM.ICD9 LIKE '%齲病%'
              OR GY_JBBM.JBMC LIKE '%齲病%'
              OR GY_JBBM.PYDM LIKE '%齲病%')
ORDER BY GY_JBBM.ICD9, GY_JBBM.JBMC, GY_JBBM.PYDM;

--//執行計劃如下:
Plan hash value: 1931007572
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                             | Name                | Starts | E-Rows |E-Bytes|E-Temp | Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers |  OMem |  1Mem | Used-Mem |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                      |                     |      1 |        |       |       |  1319 (100)|          |      4 |00:00:00.07 |    1385 |       |       |          |
|   1 |  SORT ORDER BY                        |                     |      1 |  20844 |  4233K|  4520K|  1319   (1)| 00:00:01 |      4 |00:00:00.07 |    1385 |  2048 |  2048 | 2048  (0)|
|   2 |   NESTED LOOPS OUTER                  |                     |      1 |  20844 |  4233K|       |   375   (1)| 00:00:01 |      4 |00:00:00.03 |    1385 |       |       |          |
|*  3 |    INDEX FAST FULL SCAN               | I_GY_JBBM_ALL       |      1 |  20844 |   936K|       |   375   (1)| 00:00:01 |      4 |00:00:00.03 |    1385 |       |       |          |
|*  4 |    TABLE ACCESS BY INDEX ROWID BATCHED| EMR_XYJBBM          |      4 |      1 |   162 |       |     0   (0)|          |      0 |00:00:00.01 |       0 |       |       |          |
|*  5 |     INDEX RANGE SCAN                  | IDX_EMR_XYJBBM_JBXH |      0 |      1 |       |       |     0   (0)|          |      0 |00:00:00.01 |       0 |       |       |          |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
 
   3 - filter(((("GY_JBBM"."JBMC" LIKE '%齲病%' AND "GY_JBBM"."JBMC" IS NOT NULL AND "GY_JBBM"."JBMC" IS NOT NULL) OR ("GY_JBBM"."PYDM" LIKE '%齲病%' AND "GY_JBBM"."PYDM" IS NOT
              NULL AND "GY_JBBM"."PYDM" IS NOT NULL) OR ("GY_JBBM"."ICD9" LIKE '%齲病%' AND "GY_JBBM"."ICD9" IS NOT NULL AND "GY_JBBM"."ICD9" IS NOT NULL)) AND "GY_JBBM"."DMLB"=10))
   4 - filter(("EMR_XYJBBM"."ZXBZ"=0 AND ("EMR_XYJBBM"."BMMC" LIKE '%齲病%' OR "EMR_XYJBBM"."PYDM" LIKE '%齲病%')))
   5 - access("GY_JBBM"."JBXH"="EMR_XYJBBM"."JBXH")
 
3.總結:

--//我個人比較喜歡+的語法,實際上我個人認為上面的寫法很容易寫錯,開發團隊應該統一使用的語法。
--//而且使用加號的語法理論上滿足大部分應用的需求,而且不容易出錯。





來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2893172/,如需轉載,請註明出處,否則將追究法律責任。

相關文章