測試說明:
MERGE是oracle提供的一種特殊的sql語法,非常適用於資料同步場景,即: (把A表資料插到B表,如果B表存在相同主鍵的記錄則使用A表資料對B表進行更新) 資料同步的常規做法是先嚐試插入,插入失敗再進行更新,MERGE比這種常規做法效率高很多。 (特別是A與B表基本一致,同步時主鍵衝突比較多的情況,效率能相差10倍以上)
為了驗證MERGE效率,我建了兩張表,tab_test_C(初始化生成50000條記錄)和tab_test_Q(初始化從tab_test_C生成40000條記錄), 寫了兩個plsql指令碼,分別將tab_test_C的資料同步到tab_test_Q,看它們效率區別。
第一個指令碼使用merge語法,第二個指令碼使用常規先插入,出現主鍵衝突的操作。
測試結果:
使用merge語法的指令碼同步資料耗時0.04秒,使用常規操作耗時14.77秒,效率差369倍
測試指令碼:
SET SERVEROUTPUT ON -- 啟動計時 以便觀察指令碼執行時間 SET TIMING ON SET TIME ON
-- 資料初始化 DROP TABLE tab_test_C; CREATE TABLE tab_test_C ( C1 VARCHAR2(512), C2 VARCHAR2(512), C3 VARCHAR2(512), C4 VARCHAR2(512), C5 VARCHAR2(512), C6 VARCHAR2(512), C7 VARCHAR2(512), C8 VARCHAR2(512), C9 VARCHAR2(512), C10 VARCHAR2(512) ); DECLARE v_total number; BEGIN v_total := 0; LOOP EXIT WHEN v_total >= 50000; for cur in (select owner, object_name, subobject_name, object_id, data_object_id, object_type, created, last_ddl_time, timestamp from all_objects where rownum < 101) loop insert into tab_test_C values (cur.owner, cur.object_name, cur.subobject_name, cur.object_id, cur.data_object_id, cur.object_type, cur.created, cur.last_ddl_time, cur.timestamp, v_total); v_total := v_total + 1; end loop; END LOOP; COMMIT; END; / -- 建唯一索引 select count(1) from tab_test_C; create UNIQUE INDEX uid_test_c_1 on tab_test_C(C10);
--初始化tab_test_Q表資料,先從tab_test_C生成同步40000條資料,剩下10000條資料使用指令碼同步過來 DROP TABLE tab_test_Q; CREATE TABLE tab_test_Q AS SELECT * FROM tab_test_C where rownum < 40001; create UNIQUE INDEX uid_test_q_1 on tab_test_Q(C10); -- 驗證資料未同步成功 此時記錄數差1000 select count(*) from tab_test_Q;
-- 使用merge語法同步tab_test_C的資料到tab_test_Q DECLARE CURSOR cur is select * from tab_test_C; type mergeArray_t is table of tab_test_C % ROWTYPE index by BINARY_INTEGER; mergeArray mergeArray_t; BEGIN OPEN cur; LOOP EXIT WHEN cur % NOTFOUND; FETCH cur bulk collect into mergeArray LIMIT 16; -- 每次限十幾條記錄,不要佔用太多記憶體 這個數字調大點效率會更高 BEGIN FORALL rw IN 1 .. mergeArray.count MERGE INTO tab_test_Q A USING (SELECT mergeArray(rw).C1 C1, mergeArray(rw).C2 C2, mergeArray(rw).C3 C3, mergeArray(rw).C4 C4, mergeArray(rw).C5 C5, mergeArray(rw).C6 C6, mergeArray(rw).C7 C7, mergeArray(rw).C8 C8, mergeArray(rw).C9 C9, mergeArray(rw).C10 C10 FROM DUAL) B ON (A.C10 = B.C10) WHEN MATCHED THEN UPDATE SET A.C1 = mergeArray(rw).C1, A.C2 = mergeArray(rw).C2, A.C3 = mergeArray(rw).C3, A.C4 = mergeArray(rw).C4, A.C5 = mergeArray(rw).C5, A.C6 = mergeArray(rw).C6, A.C7 = mergeArray(rw).C7, A.C8 = mergeArray(rw).C8, A.C9 = mergeArray(rw).C9 WHEN NOT MATCHED THEN INSERT (C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) VALUES(mergeArray(rw).C1, mergeArray(rw).C2, mergeArray(rw).C3, mergeArray(rw).C4, mergeArray(rw).C5, mergeArray(rw).C6, mergeArray(rw).C7, mergeArray(rw).C8, mergeArray(rw).C9, mergeArray(rw).C10); -- DBMS_OUTPUT.PUT_LINE(mergeArray.count); EXCEPTION WHEN OTHERS THEN DBMS_OUTPUT.PUT_LINE('error1'); END; END LOOP; CLOSE cur; COMMIT; END; /
--耗時0.04秒 -- 驗證資料同步成功 select count(*) from tab_test_Q;
--初始化tab_test_Q表資料,先從tab_test_C生成同步40000條資料,剩下10000條資料使用指令碼同步過來 DROP TABLE tab_test_Q; CREATE TABLE tab_test_Q AS SELECT * FROM tab_test_C where rownum < 40001; create UNIQUE INDEX uid_test_q_1 on tab_test_Q(C10); -- 驗證資料未同步成功 此時記錄數差1000 select count(*) from tab_test_Q;
-- 使用常規語法同步tab_test_C的資料到tab_test_Q BEGIN for cur in (select * from tab_test_C) LOOP BEGIN INSERT INTO tab_test_Q(C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) VALUES(cur.C1, cur.C2, cur.C3, cur.C4, cur.C5, cur.C6, cur.C7, cur.C8, cur.C9, cur.C10); EXCEPTION WHEN DUP_VAL_ON_INDEX THEN --唯一索引衝突時更新 UPDATE tab_test_Q SET C1 = cur.C1, C2 = cur.C2, C3 = cur.C3, C4 = cur.C4, C5 = cur.C5, C6 = cur.C6, C7 = cur.C7, C8 = cur.C8, C9 = cur.C9 WHERE C10 = cur.C10; WHEN OTHERS THEN DBMS_OUTPUT.PUT_LINE('error1'); END; END LOOP; COMMIT; END; /
--耗時14.77秒 -- 驗證資料同步成功 select count(*) from tab_test_Q;