[20211203]為什麼出現負數3.txt

lfree發表於2021-12-03

[20211203]為什麼出現負數3.txt

--//前幾天看華為監控sql語句遇到的問題,連結http://blog.itpub.net/267265/viewspace-2844951/
--//裡面計算的latch_hit 大於100。也就是gets相減出現負數的情況。

--//首先考慮awr 做snapshot時從那裡取值的,我仔細想一下應該來源V$LATCH_PARENT或者V$LATCH_CHILDREN。
--//繼續昨天的探究看看。

1.環境:
> @ ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

2.分析:
> select sum(gets),name from V$LATCH_PARENT where name ='cache buffers chains' group by name
                         union all
                         select sum(gets),name from V$LATCH_CHILDREN where name ='cache buffers chains' group by name;
      SUM(GETS) NAME
--------------- ----------------------------------------
            167 cache buffers chains
  1438354062593 cache buffers chains

> /
      SUM(GETS) NAME
--------------- ----------------------------------------
            167 cache buffers chains
  1438470539683 cache buffers chains

--//很明顯,計算應該是snap時sum(gets) V$LATCH_CHILDREN檢視的總計。這樣V$LATCH_CHILDREN的gets最大是power(2,32)-1=4294967295.

> select power(2,32)-1 from dual ;
  POWER(2,32)-1
---------------
     4294967295

> select * from (select latch#,child#,gets,name from V$LATCH_CHILDREN where name ='cache buffers chains' order by gets desc) where rownum<=3;
         LATCH#          CHILD#            GETS NAME
--------------- --------------- --------------- ----------------------------------------
            177          226480      4203680676 cache buffers chains
            177          281284      4203453046 cache buffers chains
            177          299292      4045467193 cache buffers chains

--//這樣如果這些子拴鎖的gets溢位,這樣總和就會變小了。出現snap上下相減出現負數的可能性。
--//生產系統不能亂來,我透過測試環境演示這個情況。

3.環境:

SCOTT@book> @ ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

SCOTT@book> select rowid,dept.* from dept where rownum=1;
ROWID                  DEPTNO DNAME          LOC
------------------ ---------- -------------- -------------
AAAVRCAAEAAAACHAAA         10 ACCOUNTING     NEW YORK

SCOTT@book> @ rowid AAAVRCAAEAAAACHAAA
    OBJECT       FILE      BLOCK        ROW ROWID_DBA            DBA                  TEXT
---------- ---------- ---------- ---------- -------------------- -------------------- ----------------------------------------
     87106          4        135          0  0x1000087           4,135                alter system dump datafile 4 block 135 ;

SYS@book> @ bh 4 135
   INST_ID HLADDR              DBARFIL     DBABLK      CLASS CLASS_TYPE         STATE             TCH CR_SCN_BAS CR_SCN_WRP CR_UBA_FIL CR_UBA_BLK CR_UBA_SEQ BA               LE_ADDR          OBJECT_NAME
---------- ---------------- ---------- ---------- ---------- ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------------- ---------------- --------------------
         1 0000000084D25320          4        135          1 data block         xcur                1          0          0          0          0          0 000000006F2E0000 00               DEPT
--//記下HLADDR=0000000084D25320.

SYS@book> select addr,name,gets  from V$LATCH_CHILDREN where addr=hextoraw('0000000084D25320');
ADDR             NAME                                           GETS
---------------- ---------------------------------------- ----------
0000000084D25320 cache buffers chains                          17033

SCOTT@book> select rowid,dept.* from dept where rownum=1;
ROWID                  DEPTNO DNAME          LOC
------------------ ---------- -------------- -------------
AAAVRCAAEAAAACHAAA         10 ACCOUNTING     NEW YORK

SYS@book> select addr,name,gets  from V$LATCH_CHILDREN where addr=hextoraw('0000000084D25320');
ADDR             NAME                                           GETS
---------------- ---------------------------------------- ----------
0000000084D25320 cache buffers chains                          17035
--//查詢1次,gets增加2.

SYS@book> select *  from V$LATCH_CHILDREN where addr=hextoraw('0000000084D25320')
  2  @ prxx
==============================
ADDR                          : 0000000084D25320
LATCH#                        : 177
CHILD#                        : 1793
LEVEL#                        : 1
NAME                          : cache buffers chains
HASH                          : 3563305585
GETS                          : 17035
MISSES                        : 0
SLEEPS                        : 0
IMMEDIATE_GETS                : 324
IMMEDIATE_MISSES              : 0
WAITERS_WOKEN                 : 0
WAITS_HOLDING_LATCH           : 0
SPIN_GETS                     : 0
...
PL/SQL procedure successfully completed.
--//可以發現gets=17035,增加2次。

SYS@book> oradebug peek 0x0000000084D25320 64
[084D25320, 084D25360) = 00000000 00000000 0000428B 000000B1 00000001 00000712 00000000 00000000 00000144 00000000 00000000 00000000 00000000 00000000 ...
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
SYS@book> @ calc 17035 + 0
         DEC                  HEX
------------ --------------------
17035.000000                 428B

--//324 = 0x144,IMMEDIATE_GETS在後面。b1 = 177,估計是LATCH#.
--//注意看下線先位置對應的正是gets的數量。

--//先做一次awr的snapshot。
SYS@book> exec dbms_workload_repository.create_snapshot();
PL/SQL procedure successfully completed.

SYS@book> oradebug poke 0x0000000084D25328 4 0xffffff00
BEFORE: [084D25328, 084D2532C) = 0000428B
AFTER:  [084D25328, 084D2532C) = FFFFFF00

SYS@book> select addr,name,gets  from V$LATCH_CHILDREN where addr=hextoraw('0000000084D25320');
ADDR             NAME                                           GETS
---------------- ---------------------------------------- ----------
0000000084D25320 cache buffers chains                     4294967040

--//2^32-1 = 4294967295,很接近溢位情況了。

SYS@book> exec dbms_workload_repository.create_snapshot();
PL/SQL procedure successfully completed.

SYS@book> select addr,name,gets  from V$LATCH_CHILDREN where addr=hextoraw('0000000084D25320');
ADDR             NAME                                           GETS
---------------- ---------------------------------------- ----------
0000000084D25320 cache buffers chains                     4294967058

--//4294967295-4294967058 = 237,多執行以上查詢多次。

SYS@book> select addr,name,gets  from V$LATCH_CHILDREN where addr=hextoraw('0000000084D25320');
ADDR             NAME                                           GETS
---------------- ---------------------------------------- ----------
0000000084D25320 cache buffers chains                             30

--//OK,現在溢位去了。

SYS@book> exec dbms_workload_repository.create_snapshot();
PL/SQL procedure successfully completed.

SYS@book> select max(snap_id) from DBA_HIST_SNAPSHOT;
MAX(SNAP_ID)
------------
        1950

SELECT SNAP_ID
              --   ,DBID
              ,INSTANCE_NUMBER
              ,MISSES
              ,GETS
              ,LATCH_NAME
          FROM DBA_HIST_LATCH
         WHERE     latch_name = 'cache buffers chains'
               AND snap_id IN (1950, 1949, 1948);

   SNAP_ID INSTANCE_NUMBER     MISSES       GETS LATCH_NAME
---------- --------------- ---------- ---------- ----------------------------------------------------------------
      1948               1        103   25364624 cache buffers chains
      1949               1        103 4320392703 cache buffers chains
      1950               1        103   25472305 cache buffers chains

--//哈哈,演示出來了,中間的snap_id=1949的gets最大,更加說明一點華為監控這個無用,對於大型資料庫邏輯讀超高的情況下。

set numw 12
select * from (
  SELECT SNAP_ID
        --,DBID
        ,INSTANCE_NUMBER
        ,GETS
        ,misses
        ,MISSES - LAG (MISSES, 1, NULL) OVER (ORDER BY SNAP_ID) MISSES1
        ,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS1
    FROM (  SELECT SNAP_ID
                  ,DBID
                  ,INSTANCE_NUMBER
                  ,SUM (MISSES) MISSES
                  ,SUM (GETS) GETS
              FROM DBA_HIST_LATCH
             WHERE     INSTANCE_NUMBER IN (SELECT instance_number
                                             FROM v$INSTANCE)
                   AND DBID IN (SELECT DBID
                                  FROM v$database)
          GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER)
ORDER BY SNAP_ID DESC) where rownum<=2;

     SNAP_ID INSTANCE_NUMBER         GETS       MISSES      MISSES1        GETS1
------------ --------------- ------------ ------------ ------------ ------------
        1950               1    260591322        38478            0  -4294716073
        1949               1   4555307395        38478            6   4295358093

--//可以看出對於大型生產系統看這樣的監控毫無用處,查詢大於latch_hit大於100的情況也許是問題最嚴重的時候。

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2845698/,如需轉載,請註明出處,否則將追究法律責任。

相關文章