[20211130]為什麼出現負數.txt

lfree發表於2021-11-30

[20211130]為什麼出現負數.txt

--//生產系統華為做的無用監控,出現一個奇怪的現象,做一個簡單分析。

1.環境:
> @ ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

> @ dashtop sql_id "machine like 'H3C%'"  trunc(sysdate)-1 trunc(sysdate)
    Total
  Seconds     AAS %This   SQL_ID        FIRST_SEEN          LAST_SEEN
--------- ------- ------- ------------- ------------------- -------------------
    28580      .3   47%   5r14h528vkacs 2021-11-29 00:01:32 2021-11-29 23:58:30
    18250      .2   30%   8ss7js42xzp05 2021-11-29 00:13:10 2021-11-29 23:59:51
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~    
     3650      .0    6%   c3jafyjuwt13b 2021-11-29 00:06:15 2021-11-29 23:58:41
      740      .0    1%   f454ryjfx6syf 2021-11-29 00:04:01 2021-11-29 23:59:01
      680      .0    1%   ck5qb9zs2n34g 2021-11-29 00:03:13 2021-11-29 23:33:58
      610      .0    1%   8sxz1p1238fyh 2021-11-29 00:03:51 2021-11-29 23:38:50
      510      .0    1%   cyfdvynj0mtc8 2021-11-29 00:08:06 2021-11-29 23:58:51
      470      .0    1%   5ub6g7qwaf35x 2021-11-29 01:28:48 2021-11-29 23:33:47
      370      .0    1%   9yfzqfdw2yhs4 2021-11-29 00:03:41 2021-11-29 23:38:40
      350      .0    1%   5t9zzqmqdyxbg 2021-11-29 00:33:47 2021-11-29 23:53:49
      330      .0    1%   27m1sf1nknfz2 2021-11-29 00:13:20 2021-11-29 23:39:00
      320      .0    1%   fpamfm2pkznu1 2021-11-29 00:08:16 2021-11-29 18:35:27
      270      .0    0%   19nrxkxw2b8j1 2021-11-29 00:18:13 2021-11-29 23:33:43
      260      .0    0%   21t4z1r0k4cyd 2021-11-29 01:06:13 2021-11-29 23:18:39
      220      .0    0%   93jgxvdzsx4y1 2021-11-29 00:48:46 2021-11-29 23:51:56
      200      .0    0%   18q3m92yk5zg5 2021-11-29 00:09:04 2021-11-29 23:53:59
      200      .0    0%   8fm0xfacp0b0g 2021-11-29 05:48:51 2021-11-29 21:53:53
      180      .0    0%   6uz4za48wf6j7 2021-11-29 02:18:08 2021-11-29 23:28:55
      170      .0    0%   6sbq34x7ckff7 2021-11-29 01:13:50 2021-11-29 20:38:52
      170      .0    0%   gwt7tu3383grt 2021-11-29 02:23:59 2021-11-29 22:53:56
20 rows selected.
--//檢視下劃線條sql語句。

> @ sqlid 8ss7js42xzp05
SQL_ID           HASH_VALUE SQLTEXT
------------- ------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
8ss7js42xzp05      98554885 SELECT ROUND(100 *(1-A.MISSES / A.GETS), 2) latch_hit FROM( SELECT SNAP_ID, DBID, INSTANCE_NUMBER, MISSES - LAG(MISSES, 1, NULL) OVER(ORDER BY SNAP_ID) MISSES, GETS - LAG(GETS, 1, NULL) OVER(ORDER BY
                            SNAP_ID) GETS FROM ( SELECT SNAP_ID, DBID, INSTANCE_NUMBER, SUM(MISSES) MISSES, SUM(GETS) GETS FROM DBA_HIST_LATCH WHERE INSTANCE_NUMBER IN ( SELECT instance_number FROM v$INSTANCE) AND DBID IN ( SELE
                            CT DBID FROM v$database) GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER) ORDER BY SNAP_ID DESC) A WHERE rownum = 1

--//格式化如下:
SELECT ROUND (100 * (1 - A.MISSES / A.GETS), 2) latch_hit
  FROM (  SELECT SNAP_ID
                ,DBID
                ,INSTANCE_NUMBER
                ,MISSES - LAG (MISSES, 1, NULL) OVER (ORDER BY SNAP_ID) MISSES
                ,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS
            FROM (  SELECT SNAP_ID
                          ,DBID
                          ,INSTANCE_NUMBER
                          ,SUM (MISSES) MISSES
                          ,SUM (GETS) GETS
                      FROM DBA_HIST_LATCH
                     WHERE     INSTANCE_NUMBER IN (SELECT instance_number
                                                     FROM v$INSTANCE)
                           AND DBID IN (SELECT DBID
                                          FROM v$database)
                  GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER)
        ORDER BY SNAP_ID DESC) A
 WHERE ROWNUM = 1;

--//可以看出就是取awr最後兩個差值。在一個小時內這樣的查詢latch_hit百分比怎麼會有變化,可以肯定一般misses很少,結果應該接
--//近100%。實際上這些東西就是忽悠人的東西,根本毫無用處,還不如toad下database monitor簡單實用。
--//查詢結果如下,明顯出現問題,怎麼會出現大於100%的情況,出現溢位嗎。

    LATCH_HIT
-------------
       106.72

select * from (
  SELECT SNAP_ID
        --,DBID
        ,INSTANCE_NUMBER
        ,GETS
        ,misses
        ,MISSES - LAG (MISSES, 1, NULL) OVER (ORDER BY SNAP_ID) MISSES1
        ,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS1
    FROM (  SELECT SNAP_ID
                  ,DBID
                  ,INSTANCE_NUMBER
                  ,SUM (MISSES) MISSES
                  ,SUM (GETS) GETS
              FROM DBA_HIST_LATCH
             WHERE     INSTANCE_NUMBER IN (SELECT instance_number
                                             FROM v$INSTANCE)
                   AND DBID IN (SELECT DBID
                                  FROM v$database)
          GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER)
ORDER BY SNAP_ID DESC) where rownum<=2;

      SNAP_ID INSTANCE_NUMBER          GETS        MISSES       MISSES1         GETS1
------------- --------------- ------------- ------------- ------------- -------------
        62159               1 1325998236957     869554321      18008851    -267855682
        62158               1 1326266092639     851545470      37590358      90100279
2 rows selected.

--//這樣看gets並不是很大,為什麼出現最大snap_id的gets比下一個snap_id的gets小的情況呢。仔細看看前面有一個sum彙總,看看一
--//些細節

SELECT SNAP_ID
   --   ,DBID
      ,INSTANCE_NUMBER
      , MISSES
      , GETS
      ,LATCH_NAME
  FROM DBA_HIST_LATCH
 WHERE     INSTANCE_NUMBER IN (SELECT instance_number FROM v$INSTANCE)
       AND DBID IN (SELECT DBID FROM v$database)
       AND SNAP_ID in ( 62158,62159)
       and latch_name='cache buffers chains'
       order by gets desc;

      SNAP_ID INSTANCE_NUMBER        MISSES          GETS LATCH_NAME
------------- --------------- ------------- ------------- --------------------
        62158               1     836511814 1249549542753 cache buffers chains
        62159               1     854386428 1248737851161 cache buffers chains
--//1248737851161-1249549542753 = -811691592.出現了負數。為什麼呢?

column HOST_NAME noprint
column INSTANCE_NAME noprint
column DB_NAME noprint
column DBID noprint
select * from (select * from DBA_HIST_DATABASE_INSTANCE where instance_number =1  order by 3 desc) where rownum<=3
/
INSTANCE_NUMBER STARTUP_TIME            PAR VERSION        LAST_ASH_SAMPLE_ID PLATFORM_NAME
--------------- ----------------------- --- -------------- ------------------ ----------------
              1 2021-11-26 19:38:15.000 YES 11.2.0.4.0              222680974 Linux x86 64-bit
              1 2021-10-31 05:41:30.000 YES 11.2.0.4.0              222356033 Linux x86 64-bit
              1 2021-10-31 05:34:02.000 YES 11.2.0.4.0                      0 Linux x86 64-bit
--//這個時間段很明顯資料庫並沒有重啟。

select * from
(SELECT SNAP_ID
   --   ,DBID
      ,INSTANCE_NUMBER
      , MISSES
      , GETS
      ,LATCH_NAME
       ,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS1
  FROM DBA_HIST_LATCH
 WHERE     INSTANCE_NUMBER IN (SELECT instance_number FROM v$INSTANCE)
       AND DBID IN (SELECT DBID FROM v$database)
     --  and SNAP_ID in ( 62158,62159)
       and latch_name='cache buffers chains'
       order by snap_id desc)
       where gets1<0;

> set numw 15
> /
        SNAP_ID INSTANCE_NUMBER          MISSES            GETS LATCH_NAME                     GETS1
--------------- --------------- --------------- --------------- -------------------- ---------------
          62159               1       854386428   1248737851161 cache buffers chains      -811691592
          62158               1       836511814   1249549542753 cache buffers chains      -421460196
          62068               1           61996       964237686 cache buffers chains  -3344960661046
          62056               1      1474503455   3284864196923 cache buffers chains     -6003979992
          62047               1      1473983577   3276119437122 cache buffers chains     -3578219873
          62046               1      1473534857   3279697656995 cache buffers chains     -1184524959
          62037               1      1460525952   3250157788769 cache buffers chains      -665290398
          62035               1      1456437704   3243620822520 cache buffers chains     -1577069275
          61990               1      1386072147   3137679778941 cache buffers chains     -1046511393
          61975               1      1370929739   3102129814110 cache buffers chains     -1173033933
          61784               1       770631163   2131136455538 cache buffers chains      -309450438
          61764               1       744693650   2053424060395 cache buffers chains     -1895267888
          61588               1       366281461    974340934754 cache buffers chains      -964787623
          61427               1              48          771254 cache buffers chains -61944486703368
14 rows selected.
--//可以看出多次出現了負數。

> select STARTUP_TIME,BEGIN_INTERVAL_TIME,END_INTERVAL_TIME from DBA_HIST_SNAPSHOT where snap_id in (61427,62068) and instance_number=1;
STARTUP_TIME            BEGIN_INTERVAL_TIME      END_INTERVAL_TIME
----------------------- ------------------------ ------------------------
2021-11-26 19:38:15.000 2021-11-26 19:38:15.000  2021-11-26 20:00:47.136
2021-10-31 02:57:49.000 2021-10-31 02:57:49.000  2021-10-31 03:00:11.216

--//變化很大的snap_id跟資料庫啟動是相關的。其它出現負數說明oracle的設計有bug,不大可能出現溢位的說法。
--//另外華為的研發寫sql太差勁了,我自己改寫的版本:
--//dbid,instance_number 應該讀寫到變數,減少對控制檔案的訪問。

SELECT  /*+ gather_plan_statistics */ ROUND (100 * (1 - A.MISSES / A.GETS), 2) latch_hit
  FROM (  SELECT SNAP_ID
                ,DBID
                ,INSTANCE_NUMBER
                ,MISSES - LAG (MISSES, 1, NULL) OVER (ORDER BY SNAP_ID) MISSES
                ,GETS - LAG (GETS, 1, NULL) OVER (ORDER BY SNAP_ID) GETS
            FROM (  SELECT SNAP_ID
                          ,DBID
                          ,INSTANCE_NUMBER
                          ,SUM (MISSES) MISSES
                          ,SUM (GETS) GETS
                      FROM DBA_HIST_LATCH
                     WHERE (SNAP_ID,INSTANCE_NUMBER,DBID) IN (SELECT SNAP_ID,INSTANCE_NUMBER,DBID
                                         FROM (  SELECT snap_id,instance_number,DBID
                                                   FROM DBA_HIST_SNAPSHOT
                                                  WHERE     INSTANCE_NUMBER = SYS_CONTEXT ('USERENV' ,'INSTANCE')
                                                        AND DBID = &dbid
                                               ORDER BY 1 DESC)
                                        WHERE ROWNUM <= 2)
                  GROUP BY SNAP_ID, DBID, INSTANCE_NUMBER)
        ORDER BY SNAP_ID DESC) a
 WHERE ROWNUM = 1;

 LATCH_HIT
----------
     99.19
1 row selected.
Elapsed: 00:00:00.01

> @ a1.txt
 LATCH_HIT
----------
     99.19
Elapsed: 00:00:00.87

--//比對方寫的快不少。
--//在我看來這東西就是無聊騙人的東西,對於診斷一點幫助都沒有,出問題這個值也是接近100%。

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2844951/,如需轉載,請註明出處,否則將追究法律責任。

相關文章