[20220121]Hash Aggregation.txt

lfree發表於2022-01-21

[20220121]Hash Aggregation.txt

--//重複裡面的測試:

1.環境:
SCOTT@book> @ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

create table t1
as
with generator as (
        select
                rownum id
        from dual
        connect by
                level <= 1e4  -- > comment to avoid wordpress format issue
)
select
        trunc(dbms_random.value(0,262144))      n_256K,
        trunc(dbms_random.value(0,131072))      n_128K,
        trunc(dbms_random.value(0,8192))        n_8k
from
        generator       v1,
        generator       v2
where
        rownum <= 8 * 1048576  -- > comment to avoid wordpress format issue
;

set arraysize 1000
set timing on
set autotrace traceonly

prompt  ===========
prompt  No Order by
prompt  ===========

select n_8K, count(*) ct from t1 group by n_8k ;

prompt  =============
prompt  With Order by
prompt  =============

select n_8K, count(*) ct from t1 group by n_8k order by 1 ;


--//顯示的執行計劃如下:
SCOTT@book> select n_8K, count(*) ct from t1 group by n_8k ;
8192 rows selected.
Elapsed: 00:00:01.47
Execution Plan
----------------------------------------------------------
Plan hash value: 136660032
---------------------------------------------------------------------------
| Id  | Operation          | Name | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------
|   0 | SELECT STATEMENT   |      |  8192 | 32768 |  6261   (5)| 00:01:16 |
|   1 |  HASH GROUP BY     |      |  8192 | 32768 |  6261   (5)| 00:01:16 |
|   2 |   TABLE ACCESS FULL| T1   |  8388K|    32M|  6024   (1)| 00:01:13 |
---------------------------------------------------------------------------
Statistics
----------------------------------------------------------
          1  recursive calls
          0  db block gets
      21876  consistent gets
      21873  physical reads
          0  redo size
      79540  bytes sent via SQL*Net to client
        607  bytes received via SQL*Net from client
         10  SQL*Net roundtrips to/from client
          0  sorts (memory)
          0  sorts (disk)
       8192  rows processed

SCOTT@book> select n_8K, count(*) ct from t1 group by n_8k order by 1 ;
8192 rows selected.
Elapsed: 00:00:03.58
Execution Plan
----------------------------------------------------------
Plan hash value: 3946799371
---------------------------------------------------------------------------
| Id  | Operation          | Name | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------
|   0 | SELECT STATEMENT   |      |  8192 | 32768 |  6261   (5)| 00:01:16 |
|   1 |  SORT GROUP BY     |      |  8192 | 32768 |  6261   (5)| 00:01:16 |
|   2 |   TABLE ACCESS FULL| T1   |  8388K|    32M|  6024   (1)| 00:01:13 |
---------------------------------------------------------------------------
Statistics
----------------------------------------------------------
          1  recursive calls
          0  db block gets
      21876  consistent gets
      21873  physical reads
          0  redo size
      79540  bytes sent via SQL*Net to client
        607  bytes received via SQL*Net from client
         10  SQL*Net roundtrips to/from client
          1  sorts (memory)
          0  sorts (disk)
       8192  rows processed
       
--//你可以發現走SORT GROUP BY需要將近4秒,HASH GROUP BY快一些.

SCOTT@book> @ sl all
alter session set statistics_level = all;
Session altered.

select n_8K, count(*) ct from t1 group by n_8k ;
SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  92vdmp2bp6nt8, child number 1
-------------------------------------
select n_8K, count(*) ct from t1 group by n_8k
Plan hash value: 136660032
---------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation          | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers | Reads  |  OMem |  1Mem | Used-Mem |
---------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT   |      |      1 |        |       |  6261 (100)|          |   8192 |00:00:02.08 |   21876 |  21873 |       |       |          |
|   1 |  HASH GROUP BY     |      |      1 |   8192 | 32768 |  6261   (5)| 00:01:16 |   8192 |00:00:02.08 |   21876 |  21873 |   415M|    16M| 5521K (0)|
|   2 |   TABLE ACCESS FULL| T1   |      1 |   8388K|    32M|  6024   (1)| 00:01:13 |   8388K|00:00:00.60 |   21876 |  21873 |       |       |          |
---------------------------------------------------------------------------------------------------------------------------------------------------------

Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------

   1 - SEL$1
   2 - SEL$1 / T1@SEL$1

select n_8K, count(*) ct from t1 group by n_8k order by 1;
SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  00uj4uh3nzz1y, child number 1
-------------------------------------
select n_8K, count(*) ct from t1 group by n_8k order by 1
Plan hash value: 3946799371
---------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation          | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers | Reads  |  OMem |  1Mem | Used-Mem |
---------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT   |      |      1 |        |       |  6261 (100)|          |   8192 |00:00:03.89 |   21876 |  21873 |       |       |          |
|   1 |  SORT GROUP BY     |      |      1 |   8192 | 32768 |  6261   (5)| 00:01:16 |   8192 |00:00:03.89 |   21876 |  21873 |   549K|   549K|  487K (0)|
|   2 |   TABLE ACCESS FULL| T1   |      1 |   8388K|    32M|  6024   (1)| 00:01:13 |   8388K|00:00:00.57 |   21876 |  21873 |       |       |          |
---------------------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1
   2 - SEL$1 / T1@SEL$1
   
--//如果上下比較可以發現除了執行計劃不同外,差別在於Omem,1mem,Used-Mem的使用上,SORT GROUP BY消耗要小許多,但是如果排序的記
--//錄多有點慢.

--//作者提供提示來最佳化該語句,實際上這個僅僅在排序很大的情況下才建議使用走HASH GROUP BY,但是資源消耗也大一些.

select dist_8k, ct from ( select  /*+ no_merge */ n_8K dist_8k, count(*) ct from t1 group by n_8k) order by dist_8k ;
select  /*+ use_hash_aggregation */ n_8K, count(*) ct from t1 group by n_8k order by 1 ;

--//執行計劃如下:
SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  d5nc55qsjnszm, child number 0
-------------------------------------
select dist_8k, ct from ( select  /*+ no_merge */ n_8K dist_8k,
count(*) ct from t1 group by n_8k) order by dist_8k
Plan hash value: 1705136228
-----------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation            | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers | Reads  |  OMem |  1Mem | Used-Mem |
-----------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT     |      |      1 |        |       |  6261 (100)|          |   8192 |00:00:01.92 |   21876 |  21873 |       |       |          |
|   1 |  SORT ORDER BY       |      |      1 |   8192 |   208K|  6261   (5)| 00:01:16 |   8192 |00:00:01.92 |   21876 |  21873 |   407K|   407K|  361K (0)|
|   2 |   VIEW               |      |      1 |   8192 |   208K|  6261   (5)| 00:01:16 |   8192 |00:00:01.91 |   21876 |  21873 |       |       |          |
|   3 |    HASH GROUP BY     |      |      1 |   8192 | 32768 |  6261   (5)| 00:01:16 |   8192 |00:00:01.91 |   21876 |  21873 |   415M|    16M| 5527K (0)|
|   4 |     TABLE ACCESS FULL| T1   |      1 |   8388K|    32M|  6024   (1)| 00:01:13 |   8388K|00:00:00.55 |   21876 |  21873 |       |       |          |
-----------------------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1
   2 - SEL$2 / from$_subquery$_001@SEL$1
   3 - SEL$2
   4 - SEL$2 / T1@SEL$2

SCOTT@book> @ dpc '' '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  cny0n0cbpsy00, child number 0
-------------------------------------
select  /*+ use_hash_aggregation */ n_8K, count(*) ct from t1 group by
n_8k order by 1
Plan hash value: 2808104874
----------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers | Reads  |  OMem |  1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT    |      |      1 |        |       |  6261 (100)|          |   8192 |00:00:01.92 |   21876 |  21873 |       |       |          |
|   1 |  SORT ORDER BY      |      |      1 |   8192 | 32768 |  6261   (5)| 00:01:16 |   8192 |00:00:01.92 |   21876 |  21873 |   407K|   407K|  361K (0)|
|   2 |   HASH GROUP BY     |      |      1 |   8192 | 32768 |  6261   (5)| 00:01:16 |   8192 |00:00:01.92 |   21876 |  21873 |   415M|    16M| 5526K (0)|
|   3 |    TABLE ACCESS FULL| T1   |      1 |   8388K|    32M|  6024   (1)| 00:01:13 |   8388K|00:00:00.56 |   21876 |  21873 |       |       |          |
----------------------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$1
   3 - SEL$1 / T1@SEL$1

--//作者還給了一個如何加入提示的例子.
SCOTT@book> create table t2 as select * from all_objects ;
Table created.

SCOTT@book> create table t3 as select * from all_objects ;
Table created.

--//分析略.

select
        t2.owner, count(*)
from
        t2
where
        t2.object_id in (
                select  t3.object_id
                from    t3
                where   t3.object_type = 'TABLE'
        )
group by
        t2.owner
order by
        t2.owner
/

SCOTT@book> @ dpc '' outline ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  6pcbhwhh1659s, child number 0
-------------------------------------
select         t2.owner, count(*) from         t2 where
t2.object_id in (                 select  t3.object_id
from    t3                 where   t3.object_type = 'TABLE'         )
group by         t2.owner order by         t2.owner

Plan hash value: 2646727453

------------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation             | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers | Reads  |  OMem |  1Mem | Used-Mem |
------------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT      |      |      1 |        |       |   677 (100)|          |     23 |00:00:00.07 |    2428 |   2424 |       |       |          |
|   1 |  SORT GROUP BY        |      |      1 |     29 |   725 |   677   (1)| 00:00:09 |     23 |00:00:00.07 |    2428 |   2424 |  2048 |  2048 | 2048  (0)|
|*  2 |   HASH JOIN RIGHT SEMI|      |      1 |   2121 | 53025 |   676   (1)| 00:00:09 |   2978 |00:00:00.07 |    2428 |   2424 |  2293K|  2293K| 1487K (0)|
|*  3 |    TABLE ACCESS FULL  | T3   |      1 |   2121 | 29694 |   338   (1)| 00:00:05 |   2979 |00:00:00.01 |    1214 |   1212 |       |       |          |
|   4 |    TABLE ACCESS FULL  | T2   |      1 |  84838 |   911K|   338   (1)| 00:00:05 |  84838 |00:00:00.02 |    1214 |   1212 |       |       |          |
------------------------------------------------------------------------------------------------------------------------------------------------------------

Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------

   1 - SEL$5DA710D3
   ~~~~~~~~~~~~~~~~~~
   3 - SEL$5DA710D3 / T3@SEL$2
   4 - SEL$5DA710D3 / T2@SEL$1

Outline Data
-------------

  /*+
      BEGIN_OUTLINE_DATA
      IGNORE_OPTIM_EMBEDDED_HINTS
      OPTIMIZER_FEATURES_ENABLE('11.2.0.4')
      DB_VERSION('11.2.0.4')
      ALL_ROWS
      OUTLINE_LEAF(@"SEL$5DA710D3")
      UNNEST(@"SEL$2")
      OUTLINE(@"SEL$1")
      OUTLINE(@"SEL$2")
      FULL(@"SEL$5DA710D3" "T2"@"SEL$1")
      FULL(@"SEL$5DA710D3" "T3"@"SEL$2")
      LEADING(@"SEL$5DA710D3" "T2"@"SEL$1" "T3"@"SEL$2")
      USE_HASH(@"SEL$5DA710D3" "T3"@"SEL$2")
      SWAP_JOIN_INPUTS(@"SEL$5DA710D3" "T3"@"SEL$2")
      END_OUTLINE_DATA
  */

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("T2"."OBJECT_ID"="T3"."OBJECT_ID")
   3 - filter("T3"."OBJECT_TYPE"='TABLE')

select /*+ use_hash_aggregation(@SEL$5DA710D3) */
        t2.owner, count(*)
from
        t2
where
        t2.object_id in (
                select  t3.object_id
                from    t3
                where   t3.object_type = 'TABLE'
        )
group by
        t2.owner
order by
        t2.owner
/

SCOTT@book> @ dpc '' outline ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID  3p240auatqfbf, child number 0
-------------------------------------
select /*+ use_hash_aggregation(@SEL$5DA710D3) */         t2.owner,
count(*) from         t2 where         t2.object_id in (
 select  t3.object_id                 from    t3                 where
 t3.object_type = 'TABLE'         ) group by         t2.owner order by
       t2.owner

Plan hash value: 4018836393

-------------------------------------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation              | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time   | A-Rows |   A-Time   | Buffers | Reads  |  OMem |  1Mem | Used-Mem |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT       |      |      1 |        |       |   677 (100)|          |     23 |00:00:00.07 |    2428 |   2424 |       |       |          |
|   1 |  SORT ORDER BY         |      |      1 |     29 |   725 |   677   (1)| 00:00:09 |     23 |00:00:00.07 |    2428 |   2424 |  2048 |  2048 | 2048  (0)|
|   2 |   HASH GROUP BY        |      |      1 |     29 |   725 |   677   (1)| 00:00:09 |     23 |00:00:00.07 |    2428 |   2424 |  1322K|  1322K| 1117K (0)|
|*  3 |    HASH JOIN RIGHT SEMI|      |      1 |   2121 | 53025 |   676   (1)| 00:00:09 |   2978 |00:00:00.07 |    2428 |   2424 |  2293K|  2293K| 1531K (0)|
|*  4 |     TABLE ACCESS FULL  | T3   |      1 |   2121 | 29694 |   338   (1)| 00:00:05 |   2979 |00:00:00.01 |    1214 |   1212 |       |       |          |
|   5 |     TABLE ACCESS FULL  | T2   |      1 |  84838 |   911K|   338   (1)| 00:00:05 |  84838 |00:00:00.02 |    1214 |   1212 |       |       |          |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
   1 - SEL$5DA710D3
   4 - SEL$5DA710D3 / T3@SEL$2
   5 - SEL$5DA710D3 / T2@SEL$1
Outline Data
-------------
  /*+
      BEGIN_OUTLINE_DATA
      IGNORE_OPTIM_EMBEDDED_HINTS
      OPTIMIZER_FEATURES_ENABLE('11.2.0.4')
      DB_VERSION('11.2.0.4')
      ALL_ROWS
      OUTLINE_LEAF(@"SEL$5DA710D3")
      UNNEST(@"SEL$2")
      OUTLINE(@"SEL$1")
      OUTLINE(@"SEL$2")
      FULL(@"SEL$5DA710D3" "T2"@"SEL$1")
      FULL(@"SEL$5DA710D3" "T3"@"SEL$2")
      LEADING(@"SEL$5DA710D3" "T2"@"SEL$1" "T3"@"SEL$2")
      USE_HASH(@"SEL$5DA710D3" "T3"@"SEL$2")
      SWAP_JOIN_INPUTS(@"SEL$5DA710D3" "T3"@"SEL$2")
      USE_HASH_AGGREGATION(@"SEL$5DA710D3")
      END_OUTLINE_DATA
  */
Predicate Information (identified by operation id):
---------------------------------------------------
   3 - access("T2"."OBJECT_ID"="T3"."OBJECT_ID")
   4 - filter("T3"."OBJECT_TYPE"='TABLE')

--//除非你遇到大結果集合的排序,可能這樣的最佳化才有意義.

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2853434/,如需轉載,請註明出處,否則將追究法律責任。

相關文章