MariaDB刪除重複記錄效能測試

FishParadise發表於2018-06-13

刪除重複記錄,只保留id最大的一條記錄的效能測試

環境

測試表的id為是唯一的,或是自增的主鍵。

mysql不能直接寫迴圈,只能寫在儲存過程裡。

儲存過程usp_batch_insert的引數num_count為插入總行數,引數batch_commit為每批提交的行數。

由於是測試,先把bin log關閉。在生產上做刪除重複記錄操作,不能隨意關閉,根據業務而定。

SET session sql_log_bin = 0;

建立測試表t3

CREATE TABLE `t3` (
  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `f1` varchar(32) DEFAULT NULL,
  `f2` varchar(32) DEFAULT NULL,
  `ctime` datetime(3) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

每批量提交的記錄表t3_log

CREATE TABLE `t3_log` (
  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `i` bigint(20) DEFAULT NULL,
  `ctime` datetime(3) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

構造資料的儲存過程

DELIMITER $$
DROP PROCEDURE IF EXISTS `usp_batch_insert`;
CREATE PROCEDURE `usp_batch_insert`(IN `num_count` int,IN `batch_commit` int)
BEGIN
DECLARE i INT; 
SET i = 1;
SET AUTOCOMMIT = 0;

WHILE i <= num_count DO 
  INSERT INTO t3 (f1, f2, ctime) 
  SELECT REPLACE(UUID(), `-`, ``) AS a, REPLACE(UUID(), `-`, ``) AS b, NOW(3) AS c;
  SET i = i + 1;
  IF MOD(i, batch_commit) <= 0 THEN
    INSERT INTO t3_log (i, ctime) VALUES (i, NOW(3));
      COMMIT;
    END IF;
END WHILE; 

SET AUTOCOMMIT = 1;
END; $$
DELIMITER ;

生成200萬的測試資料

CALL usp_batch_insert(2000000, 5000);

把一部分資料重複

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 123456,10000;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 15234567,254321;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 345678,654321;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 654321,45678;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 886,123456;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 15,2000;

資料已經構造完,全表280多萬行記錄,需要刪除的資料有80多萬。

下面就來測試下全過程的時間,總耗時是216秒,其中刪除部分約29秒。

如果要保留最小id的那行記錄,則把max()函式修改為min()函式。

CREATE OR REPLACE TABLE _tmp_t3 (id INT NOT NULL PRIMARY KEY);

INSERT INTO _tmp_t3 (id)
SELECT id
FROM t3 
WHERE id NOT IN (
        SELECT maxid FROM 
                (SELECT max(id) AS  maxid FROM t3
                        GROUP BY f1, f2             
                ) b
);

DELETE a FROM t3 as a INNER JOIN _tmp_t3 as b on b.id = a.id;

如果29秒可能會影響業務,可以做成儲存過程,分批刪除。

DELIMITER $$
DROP PROCEDURE IF EXISTS `usp_batch_delete`;

CREATE PROCEDURE `usp_batch_delete`(IN `batch_commit` int)
BEGIN
DECLARE i INT; 
DECLARE num_count INT;
SET i = 1; 
SELECT MAX(id) INTO num_count FROM _tmp_t3;
SET AUTOCOMMIT = 0;

WHILE i <= num_count DO

DELETE a FROM t3 as a INNER JOIN _tmp_t3 as b on b.id = a.id AND b.id = i;
SET i = i + 1;

IF MOD(1, batch_commit) >= 0 THEN
COMMIT;
END IF;

END WHILE;

SET AUTOCOMMIT = 1;
END; $$
DELIMITER ;

相關文章