ITPUB SQL大賽第三期
SQL大賽第三期解法。
第三期題目參考:http://www.itpub.net/thread-1408182-1-1.html
版主newkid點評參考:http://www.itpub.net/thread-1415335-1-1.html
根據評委的點評,第三次答題中存在一些考慮不周的地方,不過這並不妨礙分享最佳化的思路。至於是哪裡出了問題,由於個人原因,暫時實在沒有時間深究了,等過一段時間空閒下來,再仔細研究一下。
還是仿照前兩期,貼出SQL以及最佳化的過程:
SQL> WITH ROUTE_ALL_S (C1, C2, LINES, DIS) AS
2
(
3
SELECT CITY1, CITY2, CITY1 || CITY2, DISTANCE
4
FROM ROUTES
5
UNION ALL
6
SELECT CITY1, CITY2, LINES || '-' || CITY1 || CITY2, DISTANCE + DIS
7
FROM ROUTES R, ROUTE_ALL_S A
8
WHERE A.C2 = R.CITY1
9
),
10
ROUTE_S AS
11
(
12
SELECT SUBSTR(LINES, 1, 1) R, SUBSTR(LINES, LENGTH(LINES)) T, MIN(DIS) DIS
13
FROM ROUTE_ALL_S
14
GROUP BY SUBSTR(LINES, 1, 1), SUBSTR(LINES, LENGTH(LINES))
15
),
16
ROUTE_D AS
17
(
18
SELECT R, T, DIS
19
FROM ROUTE_S
20
UNION ALL
21
SELECT T, R, DIS
22
FROM ROUTE_S
23
),
24
ROUTE_ALL_D (C1, C2, LINES, DISTANCE) AS
25
(
26
SELECT R, T, R || T, DIS
27
FROM ROUTE_D
28
UNION ALL
29
SELECT SUBSTR(LINES, 1, 1), T, LINES || '-' || R || T, DIS + DISTANCE
30
FROM ROUTE_D R, ROUTE_ALL_D A
31
WHERE A.C2 = R.R
32
AND INSTR(LINES, R || T) = 0
33
AND INSTR(LINES, T || R) = 0
34
AND C1 != T
35
AND INSTR(LINES, R, 1, 2) = 0
36
AND INSTR(LINES, T, 1, 2) = 0
37
AND DISTANCE + DIS <= NVL((SELECT DIS FROM ROUTE_D RS WHERE C1 = RS.R
AND R.T = RS.T), 9.9E38)
38
),
39
RESULT AS
40
(
41
SELECT C1 R, C2 T,
42 SUM(MIN(DISTANCE) * 2 * MAX(C.MEMBERS))
OVER(PARTITION BY C1, C2) COST,
43 SUM(MIN(DISTANCE) * 2 * MAX(C.MEMBERS))
OVER(PARTITION BY C1) COST_CITY
44
FROM ROUTE_ALL_D R, CITIES C
45
WHERE R.C2 = C.CITY_NAME(+)
46
GROUP BY C1, C2
47
)
48
SELECT R, NVL(T, 'TOTAL') T, NVL(SUM(COST), 0) COST
49
FROM RESULT
50
WHERE COST_CITY = (SELECT MIN(COST_CITY) FROM RESULT)
51
GROUP BY GROUPING SETS ((R, T), R)
52
ORDER BY R, DECODE(T, 'TOTAL', '0', T);
R T COST
---------- ---------- ----------
D TOTAL 68356
D A 3200
D B 3224
D C 3634
D E 7300
D F 7598
D G 3840
D H 14580
D I 4400
D J 12352
D K 8228
D L 0
已選擇12行。
已用時間: 00: 00: 03.61
這是原始SQL語句,首先構造出當前路徑中兩點的最短距離,然後利用UNION ALL獲取所有路徑,再次利用遞迴WITH獲取所有可能的路徑,並去除路徑中重複的情況。最後計算起點和終點間最短的路徑,並根據人數計算出每個城市的費用,進而算出哪個城市舉辦花費最少。
SQL> WITH ROUTE_D AS
2
(
3
SELECT CITY1 R, CITY2 T, DISTANCE DIS
4
FROM ROUTES
5
UNION ALL
6
SELECT CITY2, CITY1, DISTANCE
7
FROM ROUTES
8
),
9
ROUTE_ALL_D (C1, C2, LINES, DISTANCE) AS
10
(
11
SELECT R, T, R || T, DIS
12
FROM ROUTE_D
13
UNION ALL
14
SELECT A.C1, T, LINES || R || T, DIS + DISTANCE
15
FROM ROUTE_D R, ROUTE_ALL_D A
16
WHERE A.C2 = R.R
17
AND C1 != T
18
AND INSTR(LINES, R, 1, 2) = 0
19
AND INSTR(LINES, T, 1, 2) = 0
20
AND DISTANCE + DIS <= NVL((SELECT DIS FROM ROUTE_D RS WHERE A.C1 = RS.R
AND R.T = RS.T), 9.9E38)
21
),
22
RESULT AS
23
(
24
SELECT C1 R, C2 T,
25 SUM(MIN(DISTANCE) * 2 * MAX(C.MEMBERS))
OVER(PARTITION BY C1, C2) COST,
26 SUM(MIN(DISTANCE) * 2 * MAX(C.MEMBERS))
OVER(PARTITION BY C1) COST_CITY
27
FROM ROUTE_ALL_D R, CITIES C
28 WHERE R.C2 = C.CITY_NAME(+)
29
GROUP BY R.C1, R.C2
30
)
31
SELECT R, NVL(T, 'TOTAL') T, NVL(SUM(COST), 0) COST
32
FROM RESULT
33
WHERE COST_CITY = (SELECT MIN(COST_CITY) FROM RESULT)
34
GROUP BY GROUPING SETS ((R, T), R)
35
ORDER BY R, DECODE(T, 'TOTAL', '0', T);
R T COST
---------- ---------- ----------
D TOTAL 68356
D A 3200
D B 3224
D C 3634
D E 7300
D F 7598
D G 3840
D H 14580
D I 4400
D J 12352
D K 8228
D L 0
已選擇12行。
已用時間: 00: 00: 00.09
隨後最佳化的時候發現,原始SQL中第一步獲取兩個城市間最多距離的步驟完全沒有必要,而且使得獲取全部路徑後資料量增大,使得查詢效能下降。
這裡直接將ROUTES表透過UNION ALL獲取全部路徑,然後利用遞迴WITH語句來獲取所有可能的路徑,除了新增判斷路徑中出現過的城市不在重複出現外,還新增判斷,如果當前的兩點間距離已經大於ROUTES表中儲存的兩點間距離,則路徑被過濾,如果當前的路徑在ROUTES表中找不到,那麼路徑保留。
最佳化後對於測試資料僅需要不到0.1秒。
SQL> WITH ROUTE_D AS
2
(
3
SELECT CITY1 R, CITY2 T, DISTANCE DIS
4
FROM ROUTES
5
UNION ALL
6
SELECT CITY2, CITY1, DISTANCE
7
FROM ROUTES
8
),
9
ROUTE_ALL_D (C1, C2, LINES, DISTANCE) AS
10
(
11
SELECT R, T, CAST('"' || R || '"' || T || '"' AS VARCHAR2(4000)),
DIS
12
FROM ROUTE_D
13
UNION ALL
14
SELECT A.C1, T, LINES || T || '"', DIS + DISTANCE
15
FROM ROUTE_D R, ROUTE_ALL_D A
16
WHERE A.C2 = R.R
17
AND INSTR(LINES, '"' || T || '"', 1, 1) = 0
18
AND DISTANCE + DIS <= NVL((SELECT DISTANCE FROM ROUTES RS WHERE (A.C1
= RS.CITY1 AND R.T = RS.CITY2) OR (A.C1 = RS.CITY2 AND R.T = RS.CITY1)), 9.9E38)
19
),
20
RESULT AS
21
(
22
SELECT C1 R, C2 T,
23 SUM(MIN(DISTANCE) * 2 * MAX(C.MEMBERS))
OVER(PARTITION BY C1, C2) COST,
24 SUM(MIN(DISTANCE) * 2 * MAX(C.MEMBERS))
OVER(PARTITION BY C1) COST_CITY
25
FROM ROUTE_ALL_D R, CITIES C
26
WHERE R.C2 = C.CITY_NAME(+)
27
GROUP BY R.C1, R.C2
28
)
29
SELECT R, NVL(T, 'TOTAL') T, NVL(SUM(COST), 0) COST
30
FROM RESULT
31
WHERE COST_CITY = (SELECT MIN(COST_CITY) FROM RESULT)
32
GROUP BY GROUPING SETS ((R, T), R)
33
ORDER BY R, DECODE(T, 'TOTAL', CHR(0), T);
R T COST
---------- ---------- ----------
D TOTAL 68356
D A 3200
D B 3224
D C 3634
D E 7300
D F 7598
D G 3840
D H 14580
D I 4400
D J 12352
D K 8228
D L 0
已選擇12行。
已用時間: 00: 00: 00.07
對過濾條件進行了最佳化,去掉了一些不需要的過濾條件,在關聯ROUTES表時,不再關聯UNION ALL後的結果,而是透過OR條件進行過濾,這使得效率又提到了近1/5。而且這裡還考慮城市名稱互相包含的情況。
SQL> WITH ROUTE_D AS /* get full routes */
2
(
3
SELECT CITY1 R, CITY2 T, DISTANCE DIS
4
FROM ROUTES
5
UNION ALL
6
SELECT CITY2, CITY1, DISTANCE
7
FROM ROUTES
8
),
9
ROUTE_ALL_D (C1, C2, LINES, DISTANCE) AS /* get every route of any two cities
*/
10
(
11
SELECT R, T, CAST('"' || R || '"' || T || '"' AS VARCHAR2(4000)),
DIS /* avoid ora-1489 error */
12
FROM ROUTE_D
13
WHERE R != (SELECT MAX(CITY_NAME) FROM CITIES)
14
UNION ALL
15
SELECT A.C1, T, LINES || T || '"', DIS + DISTANCE /* '"' for avoid
city name contains other city name */
16
FROM ROUTE_D R, ROUTE_ALL_D A
17
WHERE A.C2 = R.R
18
AND INSTR(LINES, '"' || T || '"', 1, 1) = 0 /* avoit duplicate
city */
19
AND DISTANCE + DIS <= /* filter the distance longer than routes */
20 NVL
21 (
22 (
23 SELECT DISTANCE
24 FROM ROUTES RS
25 WHERE (A.C1 = RS.CITY1
26 AND R.T = RS.CITY2)
27 OR (A.C1 = RS.CITY2
28 AND R.T = RS.CITY1)
29 ),
30 9.9E38
31 )
32
),
33
RESULT_HALF AS
34
(
35
SELECT C1 R, C2 T, MIN(DISTANCE) DIS
36
FROM ROUTE_ALL_D
37
WHERE C1 < C2
38
GROUP BY C1, C2
39
),
40
RESULT_ALL AS
41
(
42
SELECT R, T, DIS
43
FROM RESULT_HALF
44
UNION ALL
45
SELECT T, R, DIS
46
FROM RESULT_HALF
47
),
48
RESULT AS
49
(
50
SELECT R, T,
51 DIS * 2 * C.MEMBERS COST,
52 SUM(DIS * 2 * C.MEMBERS) OVER(PARTITION
BY R) COST_CITY
53
FROM RESULT_ALL R, CITIES C
54
WHERE R.T = C.CITY_NAME(+) /* any city in route can win even the city have
no member */
55
)
56
SELECT R, NVL(T, 'TOTAL') T, NVL(SUM(COST), 0) COST
57
FROM
58
(
59
SELECT R, T, COST, RANK() OVER(ORDER BY COST_CITY) RN
60
FROM RESULT
61
)
62
WHERE RN = 1
63
GROUP BY GROUPING SETS ((R, T), R)
64 ORDER BY R, DECODE(T, 'TOTAL', CHR(0), T);
R T COST
---------- ---------- ----------
D TOTAL 68356
D A 3200
D B 3224
D C 3634
D E 7300
D F 7598
D G 3840
D H 14580
D I 4400
D J 12352
D K 8228
D L 0
已選擇12行。
已用時間: 00: 00: 00.06
這是最後提交的版本,最佳化主要是考慮到A到B和B到A的最終距離是等價的,因此最後可以先計算CITY1小於CITY2的情況,然後透過UNION ALL獲取全部的結果。而且這種方法使得在遞迴WITH構造路徑的時候,不再需要構造起點為名稱最大的城市,因為這個城市為起點的所有路徑,均可以從其他城市到這個城市的路徑獲得。
來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/4227/viewspace-691816/,如需轉載,請註明出處,否則將追究法律責任。
相關文章
- ITPUB SQL大賽第三期(二)SQL
- ITPUB SQL大賽之BUG(八)SQL
- ITPUB SQL大賽之BUG(七)SQL
- ITPUB SQL大賽之BUG(六)SQL
- ITPUB SQL大賽之BUG(五)SQL
- ITPUB SQL大賽之BUG(四)SQL
- ITPUB SQL大賽之BUG(三)SQL
- ITPUB SQL大賽之BUG(二)SQL
- ITPUB SQL大賽之BUG(一)SQL
- ITPUB SQL大賽第一期SQL
- ITPUB SQL大賽第二期SQL
- ITPUB SQL大賽第四期SQL
- ITPUB SQL大賽第二期(二)SQL
- ITPUB SQL大賽第二期(一)SQL
- sql大賽總結SQL
- sql大賽第四期SQL
- SQL資料庫程式設計大賽隨感SQL資料庫程式設計
- SQL資料庫程式設計大賽開幕SQL資料庫程式設計
- itpub 復活節最短sql(未完成程式碼)SQL
- Introduction to SQL Tuning Advisor zt自ITPUBSQL
- itpub
- 【ITPUB】ITPUB十週年感言 - Secooler
- upyun開發者大賽
- 積木大賽
- 我與Itpub
- 測試itpub
- ITPUB論壇
- ITPUB BLOG
- ITPUB索引樹索引
- 2010年ITPUB資料庫技術大會資料庫
- 大賽報名 | AI口罩佩戴檢測大賽期待你的參與!AI
- 程式設計大賽WBS程式設計
- 第一屆SQL大賽第一期優秀解題思路彙總SQL
- 全國首屆大模型創新創意應用大賽開啟,等你來賽!大模型
- 第二屆資料安全大賽“數信杯”資料安全大賽 WP
- 【ITPUB】secooler榮獲“ITPUB年度最佳BLOG寫作獎”
- 又來到ITPUB!
- ITPUB投資理財