海量資料處理_刪除重複行

redhouser發表於2011-05-30

經常會遇到需要刪除重複行的問題,以下給出可選方案:


--0,案例
drop table t;

create table t(
a int,
b int
);

insert into t values(1,1);
insert into t values(1,1);
insert into t values(1,1);
insert into t values(2,2);
commit;

--1,經典方法
--適用於小表

delete from t
where rowid not in(select min(rowid) from t group by a);

--2,分析函式
--適用於大表,要刪除極少量資料時
delete from t
where rowid in(
      select rd from
        (select rowid rd,a,row_number() over(partition by a order by rowid) rn from t)
      where rn>1
);


--3,表函式
--適用於大表,要刪除大量資料時;並行能加快嗎?
-- Define the ref cursor types and function
CREATE OR REPLACE PACKAGE filter_rows_pkg IS
  TYPE refcur_t IS REF CURSOR RETURN t%ROWTYPE;
  TYPE outrec_typ IS RECORD (
    a int,
    b int);
  TYPE outrecset IS TABLE OF outrec_typ;
 FUNCTION f_trans(p refcur_t)
      RETURN outrecset PARALLEL_ENABLE PIPELINED;
END filter_rows_pkg;
/

CREATE OR REPLACE PACKAGE BODY filter_rows_pkg IS
  current_value int := null;
 
  FUNCTION f_trans(p refcur_t)
   RETURN outrecset PARALLEL_ENABLE PIPELINED IS
    out_rec outrec_typ;
    in_rec  p%ROWTYPE;
  BEGIN
  LOOP
    FETCH p INTO in_rec;
    EXIT WHEN p%NOTFOUND;
    if current_value is null or current_value<>in_rec.a then
      current_value := in_rec.a;
      out_rec.a := in_rec.a;
      out_rec.b := in_rec.b;
      PIPE ROW(out_rec);
    elsif current_value = in_rec.a then
      null;
    else
      null;     
    end if;

  END LOOP;
  CLOSE p;
  RETURN;
  END;
END filter_rows_pkg;
/

create table t_new
as
select * from table(filter_rows_pkg.f_trans(cursor(select * from t order by a)));

--4,exception table

create table myexceptions(row_id rowid,
                 owner varchar2(30),
                 table_name varchar2(30),
          constraint varchar2(30));

alter table t
  add constraint pk_t primary key(a) disable novalidate;


alter table t
  modify constraint pk_t enable validate exceptions into myexceptions;

select * from myexceptions;
--包含所有行

delete from t where rowid in(
  select row_id from myexceptions
  minus
  (select min(rowid) from t group by a)
);

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/18922393/viewspace-696654/,如需轉載,請註明出處,否則將追究法律責任。

相關文章