DBMS_PARALLEL_EXECUTE 11GR2新特性,並行訂正大資料

wei-xh發表於2012-01-09
 
 

Applies to:

PL/SQL - Version: 11.2.0.1.0 to 11.2.0.1.0
Information in this document applies to any platform.

Purpose

The purpose of the article is to introduce the 11.2 new feature on using DBMS_PARALLEL_EXECUTE to update large tables using chunk mechanism.

Scope and Application

This is intended for PL/SQL programmers

11.2 New Feature : Using DBMS_PARALLEL_EXECUTE to Update Large Tables in Parallel

Overview

The DBMS_PARALLEL_EXECUTE package enables you to incrementally update the data in a large table in parallel, in two high-level steps:
  1. Group sets of rows in the table into smaller chunks.
  2. Apply the desired UPDATE statement to the chunks in parallel, committing each time you have finished processing a chunk.
This technique is recommended whenever you are updating a lot of data. Its advantages are:
  • You lock only one set of rows at a time, for a relatively short time, instead of locking the entire table.
  • You do not lose work that has been done if something fails before the entire operation finishes.
  • You reduce rollback space consumption.
  • You improve performance.

Different Ways to Spilt Workload

  1. CREATE_CHUNKS_BY_NUMBER_COL : Chunks the table associated with the given task by the specified column.
  2. CREATE_CHUNKS_BY_ROWID : Chunks the table associated with the given task by ROWID
  3. CREATE_CHUNKS_BY_SQL : Chunks the table associated with the given task by means of a user-provided SELECT statement

Pre-Requisites before using DBMS_PARALLEL_EXECUTE.

  • The user controlling the process needs the CREATE JOB privilege
  • Execute privilege on DBMS_SQL package because CHUNK_BY_SQL, RUN_TASK, and RESUME_TASK subprograms require a query, and are executed using DBMS_SQL.

Sample Program

Creating user , granting privileges and generating sample data.

Conn /as sysdba
/* Passwords are case-sentive from 11.1 */
GRANT Create Session, Resource to Usr identified by Usr;
GRANT CREATE JOB TO Usr;

Conn Usr/Usr
/*Create table with sample data*/
DROP TABLE test_tab;
CREATE TABLE test_tab (
id NUMBER,
description VARCHAR2(50),
num_col NUMBER,
CONSTRAINT test_tab_pk PRIMARY KEY (id)
);

INSERT /*+ APPEND */ INTO test_tab
SELECT level,
'Description for ' || level,
CASE
WHEN MOD(level, 5) = 0 THEN 10
WHEN MOD(level, 3) = 0 THEN 20
ELSE 30
END
FROM dual
CONNECT BY level <= 500000;
COMMIT;


SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;

NUM_COL COUNT(*)
---------- ----------
10 100000
20 133333
30 266667

1. Using CREATE_CHUNKS_BY_ROWID


BEGIN
DBMS_PARALLEL_EXECUTE.DROP_TASK ('test_task');
END;
/

DECLARE
l_task VARCHAR2(30) := 'test_task';
l_sql_stmt VARCHAR2(32767);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.create_task (task_name => l_task);

-- Chunk the table by the ROWID
DBMS_PARALLEL_EXECUTE.create_chunks_by_rowid(task_name => l_task,
table_owner => 'USR',
table_name => 'TEST_TAB',
by_row => TRUE,
chunk_size => 10000);
-- DML to be execute in parallel
l_sql_stmt := 'UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE rowid BETWEEN :start_id AND :end_id';
-- Run the task
DBMS_PARALLEL_EXECUTE.run_task(task_name => l_task,
sql_stmt => l_sql_stmt,
language_flag => DBMS_SQL.NATIVE,
parallel_level => 10);

-- If there is error, RESUME it for at most 2 times.
l_try := 0;
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
WHILE(l_try < 2 and l_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
l_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.resume_task(l_task);
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.drop_task(l_task);
END;
/

SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;

NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667

2. Using CREATE_CHUNKS_BY_NUMBER_COL

CREATE OR REPLACE PROCEDURE process_update (p_start_id IN NUMBER, p_end_id IN NUMBER) AS
BEGIN
UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE id BETWEEN p_start_id AND p_end_id;
END;
/


DECLARE
l_task VARCHAR2(30) := 'test_task';
l_sql_stmt VARCHAR2(32767);
l_try NUMBER;
l_status NUMBER;
BEGIN
-- Create the TASK
DBMS_PARALLEL_EXECUTE.create_task (task_name => l_task);

-- Chunk the table by the ID

DBMS_PARALLEL_EXECUTE.create_chunks_by_number_col(task_name => l_task,
table_owner => 'USR',
table_name => 'TEST_TAB',
table_column => 'ID',
chunk_size => 10000);
-- Procedure to be execute in parallel
l_sql_stmt := 'BEGIN process_update(:start_id, :end_id); END;';

DBMS_PARALLEL_EXECUTE.run_task(task_name => l_task,
sql_stmt => l_sql_stmt,
language_flag => DBMS_SQL.NATIVE,
parallel_level => 10);

-- If there is error, RESUME it for at most 2 times.
l_try := 0;
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
WHILE(l_try < 2 and l_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
l_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.resume_task(l_task);
l_status := DBMS_PARALLEL_EXECUTE.task_status(l_task);
END LOOP;
-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.drop_task(l_task);
END;
/

SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;

NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667

3. Using CREATE_CHUNKS_BY_SQL.

DECLARE
l_chunk_sql VARCHAR2(1000);
l_sql_stmt VARCHAR2(1000);
l_try NUMBER;
l_status NUMBER;
BEGIN

-- Create the TASK
DBMS_PARALLEL_EXECUTE.CREATE_TASK ('test_task');

-- Chunk the table by NUM_COL

l_chunk_sql := 'SELECT DISTINCT num_col, num_col FROM test_tab';
DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_SQL('test_task', l_chunk_sql, false);

-- Execute the DML in parallel
-- the WHERE clause contain a condition on num_col, which is the chunk
-- column. In this case, grouping rows is by num_col.

l_sql_stmt := 'UPDATE /*+ ROWID (dda) */ test_tab t
SET t.num_col = t.num_col + 10
WHERE num_col BETWEEN :start_id AND :end_id';
DBMS_PARALLEL_EXECUTE.RUN_TASK('test_task', l_sql_stmt, DBMS_SQL.NATIVE,
parallel_level => 10);

-- If there is error, RESUME it for at most 2 times.
L_try := 0;
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('test_task');
WHILE(l_try < 2 and L_status != DBMS_PARALLEL_EXECUTE.FINISHED)
Loop
L_try := l_try + 1;
DBMS_PARALLEL_EXECUTE.RESUME_TASK('test_task');
L_status := DBMS_PARALLEL_EXECUTE.TASK_STATUS('test_task');
END LOOP;

-- Done with processing; drop the task
DBMS_PARALLEL_EXECUTE.DROP_TASK('test_task');

end;
/


SQL> SELECT num_col, COUNT(*)
FROM test_tab
GROUP BY num_col
ORDER BY num_col;

NUM_COL COUNT(*)
---------- ----------
20 100000
30 133333
40 266667

References :

Oracle Database PL/SQL Packages and Types Reference
11g Release 2 (11.2)
Part Number E10577-05

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/22034023/viewspace-714596/,如需轉載,請註明出處,否則將追究法律責任。

相關文章