DB2 SQL改寫

zchbaby2000發表於2018-10-25

DB2的查詢重新是很厲害的,但是有時候不夠完美,如果我們發現高消耗的SQL需要重新,那麼下面的一些例子可能是有幫助的

-> 把outer join放在inner join的後面
SELECT T1.C10,T2.C20,T3.C30,T4.C40,T5,C50
FROM T1 LEFT OUTER JOIN T2 ON T1.C2=T2.C2
LEFT OUTER JOIN T3 ON T1.C3=T3.C3
INNER JOIN T4 ON T1.C4=T4.C4
INNER JOIN T5 ON T1.C5=T5.C5
WHERE T4.C0=2

=改成=>

SELECT T1.C10,T2.C20,T3.C30,T4.C40,T5,C50
FROM T1 INNER JOIN T4 ON T1.C4=T4.C4
INNER JOIN T5 ON T1.C5=T5.C5
LEFT OUTER JOIN T2 ON T1.C2=T2.C2
LEFT OUTER JOIN T3 ON T1.C3=T3.C3
WHERE T4.C0=2


-> DB2 DPF環境, T2是大表
SELECT T1.C1,T1.CX,T2.C2,T2.C3 ......, T2.C20
FROM T1 LEFT OUTER JOIN T2
ON T1.C2=T2.C2
WHERE T1.C0=2

=改成=>

SELECT T1.C1,T1.CX,T2.C2,T2.C3 ......, T2.C20
FROM T1 LEFT OUTER JOIN
(
    SELECT T2.* FROM T2,(SELECT DISTINCT C2
    FROM T1 WHERE T1.C0=2) T1
    WHERE T1.C2=T2.C2
)
T2
ON T1.C2=T2.C2
WHERE T1.C0=2

-> 減少需要Aggregation的行數
SELECT T1.C2,T1.C10,T1.C20,T1.C30,TX.S
FROM T1 INNER JOIN
(
    SELECT T2.C2,SUM(T2.SALES) AS S
    FROM T2 INNER JOIN T3
    ON T2.C3=T3.C3
    GROUP BY T2.C2
)
TX
ON T1.C2=T2.C2
WHERE T1.C0=5

=改成=>

SELECT T1.C2,T1.C10,T1.C20,T1.C30,TX.S
FROM T1 INNER JOIN
(
    SELECT T2.C2,SUM(T2.SALES) AS S
    FROM T2 INNER JOIN T3
    ON T2.C3=T3.C3
    INNER JOIN
    (
        SELECT DISTINCT T1.C2
        FROM T1
        WHERE T1.C0=5        
    ) T1 ON T1.C2=T2.C2
    GROUP BY T2.C2
)
TX
ON T1.C2=T2.C2
WHERE T1.C0=5

--> 提前做一部分Aggregation

SELECT T2.C20,T3.C30,T4.C40,T4.C41,T4.C42,T5.C50,
SUM(T1.SALES)
FROM T1 INNER JOIN T2 ON T1.C2=T2.C2
INNER JOIN T3 ON T1.C3=T3.C3
INNER JOIN T4 ON T1.C4=T4.C4
INNER JOIN T5 ON T1.C5=T5.C5
WHERE T2.C0=2 AND
T3.C0=MONTH('03.10.2017')
GROUP BY T2.C20,T3.C30,T4.C40,T4.C41,T4.C42,T5.C50

=改成=>

SELECT T2.C20,T3.C30,T4.C40,T4.C41,T4.C42,T5.C50,
SUM(TX.S)
FROM
(
SELECT T2.C20,T3.C30,T1.C4,T1.C5,SUM(T1.SALES) AS S
FROM T1 INNER JOIN T2 ON T1.C2=T2.C2
INNER JOIN T3 ON T1.C3=T3.C3
WHERE T2.C0=2 AND
T3.C0=MONTH('03.10.2017')
GROUP BY T2.C20,T3.C30,T1.C4,T1.C5
) TX
INNER JOIN T4 ON TX.C4=T4.C4
INNER JOIN T5 ON TX.C5=T5.C5
GROUP BY TX.C20,TX.C30,T4.C40,T4.C41,T4.C42,T5.C50

--> 去重要趁早
SELECT T1.C10,T2.C20,T3.C30,T4.C40,T5.C50
FROM T1 INNER JOIN T2 ON T1.C2=T2.C2
INNER JOIN T3 ON T1.C3=T3.C3
INNER JOIN (SELECT DISTINCT C4,CX FROM T4) T4 ON T1.C4=T4.C4
INNER JOIN (SELECT DISTINCT C5,CY FROM T5) T5 ON T1.C5=T5.C5
WHERE ...

=改成=>

SELECT T1.C10,T2.C20,T3.C30,T4.C40,T5.C50
FROM T1 INNER JOIN T2 ON T1.C2=T2.C2
INNER JOIN T3 ON T1.C3=T3.C3
INNER JOIN (SELECT C4,CX FROM T4 GROUP BY C4,CX) T4 ON T1.C4=T4.C4
INNER JOIN (SELECT C5,CY FROM T5 GROUP BY C5,CY) T5 ON T1.C5=T5.C5
WHERE ...

DISTINCT和GROUP BY都可以去重,但是GROUP BY不會像DISTINCT那樣,可能做了JOIN之後再去重


--> DPF 高頻列的DISTINCT Aggregation, 減少記憶體消耗

SELECT COUNT(DISTINCT C1) - High number of distinct values,很吃記憶體的操作,所有不同的值都需要放在記憶體中,無法做部分的group by
FROM T1
GROUP BY C0 --> Low number of distinct values or no group by

=改成=>

select COUNT(*)   -- 可以做部分的group by
FROM
(
SELECT C0,C1 FROM T1 GROUP BY C0,C1
) AS T
GROUP BY C0

SELECT C0,
COUNT(DISTINCT C1) AS CDC1,
COUNT(DISTINCT C2) AS CDC2
FROM T1
GROUP BY C0

=改成=>

SELECT C0,TX.CDC1,TY.CDC2
FROM
(
    SELECT C0,COUNT(*) AS CDC1
    FROM
    (
        SELECT C0,C1
        FROM T1
        GROUP BY C0,C1
    ) AS T
    GROUP BY C0
) TX,
(
    SELECT C0,COUNT(*) AS CDC2
    FROM
    (
        SELECT C0,C2
        FROM T1
        GROUP BY C0,C2
    ) AS T
    GROUP BY C0
)
TY
WHERE TX.C0=TY.C0


--> 關聯子查詢變成表連線
SELECT *
FROM VERSIONED_TBL T1
WHERE T1.SALARY >= 100000 AND
 T1.DATE_COL >=
 (SELECT MIN(DATE_COL) FROM VERSIONED_TBL T2
 WHERE T1.EMP_ID=T2.EMP_ID)
 
=改成=>

SELECT *
FROM VERSIONED_TBL T1,
(SELECT T1.EMP_ID,MIN(DATE_COL) AS M
FROM VERSIONED_TBL T2
GROUP BY T2.EMP_ID) TX
WHERE T1.SALARY>= 100000 AND
T1.DATE_COL >= TX.M AND
T1.EMP_ID=TX.EMP_ID

SELECT C1,
(SELECT MAX(C2) FROM T2,T3
WHERE T1.C1=T2.C1 AND T2.C3=T3.C3) AS C2,
C3
FROM T1
WHERE T1.C0 > 10

=改成=>

SELECT T1.C1,
TX.MAXC2 AS C2,
T1.C3
FROM T1 LEFT OUTER JOIN
(
SELECT C1,MAX(C2) AS MAXC2
FROM T2,T3
WHERE T2.C3=T3.C3
GROUP BY C1
)
TX
ON (T1.C1=TX.C1)
WHERE T1.C0>10


--> OR join謂詞改寫成UNION
OR Join predicates cannot be processed by a Hash Join
Nested Loop Joins work well with row tables and indexes
Column oriented tables perform better with a UNION
DISTINCT must be present to transfrom

SELECT DISTINCT T1,C1,T1,C2,T1.C3,T2.CX
FROM T1,T2
WHERE ( (T1.C6=T2.C2 AND T2.C3 IN ('1','2')) OR
(T1.C7=T2.C2 AND T2.C3 NOT IN ('1','2')))

=改成=>

SELECT T1.C2,T1.C2,T1.C3,T2.CX
FROM
(
SELECT T1.C2,T1.C2,T1.C3,T2.CX
FROM T1,T2
WHERE T1.C6=T2.C2 AND T2.C3 IN ('1','2')
UNION
SELECT T1.C2,T1.C2,T1.C3,T2.CX
FROM T1,T2
WHERE T1.C7=T2.C2 AND T2.C3 NOT IN ('1','2')
)


--> 改寫NOT EXISTS
If no complexity, db2 tries to use left outer joins for better performance
The following example is with Not exists AND NOT EXISTS
IS NOT NULL is used for the exists or exists transfromation
SELECT T1.C2,T1.C2,T1.C3
FROM T1
WHERE NOT EXISTS (SELECT 1 FROM T2 WHERE T1.C2=T2.C2)
AND
NOT EXISTS (SELECT 1 FROM T3 WHERE T1.C3=T2.C3)

=改成=>

SELECT T1.C2,T1.C2,T1.C3
FROM T1 LEFT OUTER JOIN T2
ON T1.C2=T2.C2
LEFT OUTER JOIN T3
ON T1.C3=T3.C3
WHERE T2.C2 IS NULL AND T2.C3 IS NULL

--> 改寫OR EXISTS

SELECT DISTINCT T1.C2,T1.C2,T1.C3
FROM T1
WHERE T1.C0=0 OR
EXISTS (
SELECT 1 FROM T2 WHERE T1.C2=T2.C2
)

=改成=>

SELECT T1.C1,T1.C2,T1.C3
FROM
(
SELECT T1.C2,T1.C2,T1.C3
FROM T1 WHERE T1.C0=0
UNION
SELECT T1.C2,T1.C2,T1.C3
FROM T1 WHERE EXISTS
(
SELECT 1 FROM T2 WHERE T1.C2=T2.C2
)
)

--Not in 和Not exist在存在NULL的情況下是不同的,不要以為可以用NOT EXISTS可以代替NOT EXISTS

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/725820/viewspace-2217568/,如需轉載,請註明出處,否則將追究法律責任。

相關文章