[20201214]再遇SQL*Net break/reset to client.txt

lfree發表於2020-12-14

[20201214]再遇SQL*Net break/reset to client.txt

--//生產系統遇到一個問題,很久沒遇到這類問題做一個記錄。

1.環境:
XXXX> @ ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

XXXX> @ ashtop sql_id "machine<>'IMC' and sql_id not in (select sqlid from sqlid where flag=0)" trunc(sysdate) trunc(sysdate)+1
    Total
  Seconds     AAS %This   SQL_ID        FIRST_SEEN          LAST_SEEN
--------- ------- ------- ------------- ------------------- -------------------
       53      .0    5% | b0u9avfd5qydx 2020-12-14 00:09:34 2020-12-14 09:20:23
       35      .0    3% | 0x6u28dvh18us 2020-12-14 00:40:52 2020-12-14 00:41:34
       31      .0    3% | fs9p5prj0hu06 2020-12-14 00:40:15 2020-12-14 00:40:51
       23      .0    2% | cm0qrtprj32qv 2020-12-14 00:55:25 2020-12-14 09:10:53
       18      .0    2% | 7y8bs4y43psd2 2020-12-14 01:14:11 2020-12-14 09:20:55
..

--//注其它不能最佳化我建立sqlid表,放入其中。
XXXX> @ tpt/sqlid b0u9avfd5qydx %
Show SQL text, child cursors and execution stats for SQLID b0u9avfd5qydx child nvl('%','%')
no rows selected
no rows selected

--//居然沒有找到,有點奇怪!!

SELECT sql_id, event, COUNT (*)
    FROM V$ACTIVE_SESSION_HISTORY
   WHERE sql_id = 'b0u9avfd5qydx'
GROUP BY sql_id, event;

SQL_ID        EVENT                                      COUNT(*)
------------- ---------------------------------------- ----------
b0u9avfd5qydx row cache lock                                    1
b0u9avfd5qydx                                                 653
b0u9avfd5qydx SQL*Net break/reset to client                   174
b0u9avfd5qydx latch: shared pool                                1

--//出現SQL*Net break/reset to client,說明開發寫sql語句有錯誤。只能查詢基表x$kglob。

SELECT distinct kglnaobj c200, kglobt03 sql_id
  FROM x$kglob
 WHERE kglobt03 ='b0u9avfd5qydx';
C120                                      SQL_ID
----------------------------------------- -------------
          PD_LSDL.PDCY b0u9avfd5qydx
          PD_LSDL.DDSJ,
          PD_LSDL.KLRM,
          PD_LSDL.QTID,
     FROM PD_LSDL   ID,
    PD_LSDL.SLGH = :as_slghd   ANDND
 
--//看到的程式碼有點奇怪,感覺寫錯了。實際上我們開發很變態的使用\r 而不是\n作為換行,\r只代表回車,顯示出現這樣的效果。
--//你可以在一些圖形介面執行就可以避免上面的情況。
--//改寫如下:

SELECT distinct replace(kglnaobj,chr(13),chr(10)) c120, kglobt03 sql_id
  FROM x$kglob
   WHERE kglobt03 ='b0u9avfd5qydx';
C120                                        SQL_ID
------------------------------------------- -------------
  SELECT PD_LSDL.DLID,                      b0u9avfd5qydx
    PD_LSDL.RDID,
          PD_LSDL.RDSJ,
          PD_LSDL.PDHM,
          PD_LSDL.PDCY,
          PD_LSDL.PDZT,
          PD_LSDL.KSSJ,
          PD_LSDL.JSSJ,
          PD_LSDL.DDSJ,
          PD_LSDL.WCSJ,
          PD_LSDL.JLSJ,
          PD_LSDL.SLGH,
          PD_LSDL.SLRM,
          PD_LSDL.KSID,
          PD_LSDL.KSMC,
          PD_LSDL.YSID,
          PD_LSDL.YSXM,
          PD_LSDL.QTID,
          PD_LSDL.QTMC,
          PD_LSDL.YWLB,
          PD_LSDL.YWID,
          PD_LSDL.BRID
     FROM PD_LSDL
  WHERE PD_LSDL.WCSJ >= :adt_Begin AND
    PD_LSDL.WCSJ <  :adt_End   AND
    PD_LSDL.SLGH = :as_slgh
 
XXXX> @ desc portal_his.PD_LSDL
           Name   Null?    Type
           ------ -------- ----------------------------
    1      RDID   NOT NULL NUMBER(18)
    2      RDSJ   NOT NULL DATE
    3      PDHM   NOT NULL NUMBER(4)
    4      PDCY            VARCHAR2(40)
    5      PDZT   NOT NULL NUMBER(1)
    6      KSSJ   NOT NULL DATE
    7      JSSJ            DATE
    8      DDSJ   NOT NULL NUMBER(10)
    9      WCSJ            DATE
   10      JLSJ   NOT NULL NUMBER(10)
   11      SLGH            VARCHAR2(10)
   12      SLRM            VARCHAR2(10)
   13      KSID            VARCHAR2(10)
   14      KSMC            VARCHAR2(50)
   15      YSID            VARCHAR2(10)
   16      YSXM            VARCHAR2(30)
   17      QTID            VARCHAR2(20)
   18      QTMC            VARCHAR2(40)
   19      YWLB            VARCHAR2(10)
   20      YWID            VARCHAR2(20)
   21      BRID            VARCHAR2(20)
   22      ZSID            NUMBER(18)
   23      ZSMC            VARCHAR2(40)
   24      MZHM            VARCHAR2(20)
   25      ZHBZ            NUMBER(1)
   26      TSBZ            NUMBER(1)
   27      JZYX            NUMBER(1)

--//你可以發現沒有DLID欄位。這條語句一定很頻繁呼叫,我很奇怪開發怎麼測試軟體的,這樣的錯誤竟然沒有測試出來。
--//我手工執行報如下錯誤:
ORA-00904: "PD_LSDL"."DLID": invalid identifier

XXXX> ALTER SYSTEM SET EVENTS '904 TRACE NAME ERRORSTACK LEVEL 12';
System altered.

--//等20秒上下,執行如下:

XXXX> ALTER SYSTEM SET EVENTS '904 TRACE NAME ERRORSTACK off';
System altered.

$ grep -l -i pd_lsdl *
alert_XXXX1.log
XXXX1_ora_12024.trc
XXXX1_ora_1285.trc
XXXX1_ora_13499.trc
XXXX1_ora_15495.trc
XXXX1_ora_16252.trc
XXXX1_ora_17345.trc
XXXX1_ora_17523.trc
XXXX1_ora_18962.trc
XXXX1_ora_20266.trc
XXXX1_ora_20895.trc
XXXX1_ora_23503.trc
XXXX1_ora_23559.trc
XXXX1_ora_24481.trc
XXXX1_ora_27712.trc
XXXX1_ora_27897.trc
XXXX1_ora_29415.trc
XXXX1_ora_5775.trc
XXXX1_ora_603.trc
XXXX1_ora_7354.trc
XXXX1_ora_8018.trc
XXXX1_ora_8646.trc
XXXX1_ora_9662.trc
XXXX1_ora_9764.trc

--//順便開啟一個看看:
----- Error Stack Dump -----
ORA-00904: "PD_LSDL"."DLID": 識別符號無效
----- Current SQL Statement for this session (sql_id=b0u9avfd5qydx) -----
  SELECT PD_LSDL.DLID,^M
            PD_LSDL.RDID,   ^M
         PD_LSDL.RDSJ,   ^M
         PD_LSDL.PDHM,   ^M
         PD_LSDL.PDCY,   ^M
         PD_LSDL.PDZT,   ^M
         PD_LSDL.KSSJ,   ^M
         PD_LSDL.JSSJ,   ^M
         PD_LSDL.DDSJ,   ^M
         PD_LSDL.WCSJ,   ^M
         PD_LSDL.JLSJ,   ^M
         PD_LSDL.SLGH,   ^M
         PD_LSDL.SLRM,   ^M
         PD_LSDL.KSID,   ^M
         PD_LSDL.KSMC,   ^M
         PD_LSDL.YSID,   ^M
         PD_LSDL.YSXM,   ^M
         PD_LSDL.QTID,   ^M
         PD_LSDL.QTMC,   ^M
         PD_LSDL.YWLB,   ^M
         PD_LSDL.YWID,^M
         PD_LSDL.BRID^M
    FROM PD_LSDL   ^M
    WHERE PD_LSDL.WCSJ >= :adt_Begin AND^M
            PD_LSDL.WCSJ <  :adt_End   AND^M
            PD_LSDL.SLGH = :as_slgh

$ grep ORA-00904 *.trc | cut -d: -f2- | sort| uniq -c
     26 ORA-00904: "PD_LSDL"."DLID": 識別符號無效
      2 ORA-00904: "XTXH": 識別符號無效
      1 ORA-00904: "ZDLB": 識別符號無效

--//還有2處是別的表引起的錯誤。

--//在我完成測試後,再次檢視ashtop測試:

XXXX> @ ashtop sql_id "machine<>'IMC' and sql_id not in (select sqlid from scott.sqlid where flag=0)" trunc(sysdate) trunc(sysdate)+1
    Total
  Seconds     AAS %This   SQL_ID        FIRST_SEEN          LAST_SEEN
--------- ------- ------- ------------- ------------------- -------------------
      240      .0   15% | b0u9avfd5qydx 2020-12-14 00:09:34 2020-12-14 09:58:51
       35      .0    2% | 0x6u28dvh18us 2020-12-14 00:40:52 2020-12-14 00:41:34
       31      .0    2% | fs9p5prj0hu06 2020-12-14 00:40:15 2020-12-14 00:40:51

--//已經增加到240秒。真心不知道這種情況如何看到執行呼叫的頻率。

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2741906/,如需轉載,請註明出處,否則將追究法律責任。

相關文章