[20220531]inactive session等待事件2.txt

lfree發表於2022-05-31

[20220531]inactive session等待事件2.txt

--//檢查生產系統,發現inactive session 和 enq: FU - contention 等待事件,從來沒有遇到過,仔細探究看看:
--//本文集中inactive session等待事件,在分析過程中走了許多彎路,重新整理.

1.環境:
> @ ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

2.ashtop檢視:
> @ashtop event 1=1 &day
    Total                                                                                                      Distinct Distinct
  Seconds     AAS %This   EVENT                          FIRST_SEEN          LAST_SEEN           Execs Seen  Tstamps
--------- ------- ------- ------------------------------ ------------------- ------------------- ---------- --------
   334312     3.9   93% |                                2022-05-16 10:52:20 2022-05-17 10:52:12      15444    32265
    13951      .2    4% | inactive session               2022-05-16 10:52:18 2022-05-17 10:52:11       6248     8976
     2854      .0    1% | enq: FU - contention           2022-05-16 11:11:39 2022-05-17 10:14:18          1     2854
     2342      .0    1% | RMAN backup & recovery I/O     2022-05-16 19:30:08 2022-05-17 01:52:46          1      496
     1236      .0    0% | control file sequential read   2022-05-16 10:57:37 2022-05-17 10:51:08        528      996
      734      .0    0% | enq: TX - row lock contention  2022-05-16 15:43:39 2022-05-16 17:16:01          3      734
--//inactive session 和 enq: FU - contention等待事件,先集中探究inactive session,關於enq: FU - contention另外寫一篇文章.

> @ ev_name "inactive session"
EVENT#   EVENT_ID NAME             PARAMETER1  PARAMETER2  PARAMETER3           WAIT_CLASS_ID WAIT_CLASS# WAIT_CLASS
------ ---------- ---------------- ----------- ----------- -------------------- ------------- ----------- ----------
   422 3757863830 inactive session session#    waited      instance|serial         1893977003           0 Other

3.分析:
> @ ashtop sql_id,machine "event='inactive session'" &day
    Total                                                                                              Distinct Distinct
  Seconds     AAS %This   SQL_ID        MACHINE              FIRST_SEEN          LAST_SEEN           Execs Seen  Tstamps
--------- ------- ------- ------------- -------------------- ------------------- ------------------- ---------- --------
        6      .0    0% | 00pxd4aar1nug fyhis2               2022-05-18 20:27:36 2022-05-18 20:27:38          2        3
        6      .0    0% | 017smqud0wmq1 fyhis2               2022-05-19 09:33:28 2022-05-19 09:33:30          2        3
        6      .0    0% | 01rt1d6530tsb fyhis2               2022-05-19 09:25:12 2022-05-19 09:25:14          2        3
        6      .0    0% | 01tzhda22b2gw fyhis2               2022-05-19 08:19:13 2022-05-19 08:19:15          2        3
        6      .0    0% | 029k77b1npypb fyhis2               2022-05-18 12:16:55 2022-05-18 12:16:57          2        3
        6      .0    0% | 02qy363816jac fyhis2               2022-05-18 19:30:58 2022-05-18 19:31:00          2        3
....
30 rows selected.

> @ sql_id 00pxd4aar1nug
--SQL_ID = 00pxd4aar1nug
DECLARE job BINARY_INTEGER := :job; next_date DATE := :mydate;  broken BOOLEAN := FALSE; BEGIN tlogon.killsession(8481,3615); :mydate := next_date; IF broken THEN :b := 1; ELSE :b := 0; END IF; END; ;
--//檢視幾個都是類似的語句。
--//猜測防水牆執行的kill會話的語句。格式化如下:
DECLARE
    job BINARY_INTEGER := :job;
    next_date DATE := :mydate;
    broken BOOLEAN := FALSE;
BEGIN
    tlogon.killsession(8481,3615);
    :mydate := next_date;
    IF broken THEN
        :b := 1;
    ELSE
        :b := 0;
    END IF;
END;
/
--//猜測應該是防水牆對於無法登陸的回話kill.
$ tail -f alert_ywdb1.log
ORA-06512: at line 5
opiodr aborting process unknown ospid (25783) as a result of ORA-28
Tue May 17 09:11:27 2022
Errors in file /u01/app/oracle/diag/rdbms/ywdb/ywdb1/trace/ywdb1_ora_15721.trc:
ORA-00604: error occurred at recursive SQL level 1
ORA-00028: your session has been killed
ORA-06512: at "SYS.DBMS_LOCK", line 205
ORA-06512: at "HZMCASSET.TLOGON", line 1552
ORA-06512: at "HZMCASSET.TLOGON", line 1566
ORA-06512: at "HZMCASSET.TLOGON", line 1620
ORA-06512: at "HZMCASSET.TLOGON", line 2523
--//1566-1552 = 14
--//1620-1552 = 68
--//2523-1552 = 971
ORA-06512: at line 1
ORA-06512: at line 5
opiodr aborting process unknown ospid (15721) as a result of ORA-28
Tue May 17 09:11:30 2022

$ oerr ora 28
00028, 00000, "your session has been killed"
// *Cause:  A privileged user has killed your session and you are no longer
//          logged on to the database.
// *Action: Login again if you wish to continue working.

--//tlogon包體是加密的,不過網上很容易找到破解軟體.unwrap包後,執行程式碼如下,注我加了行號:
  94  PROCEDURE KILLSESSION(V_SID NUMBER, V_SERIAL# NUMBER) IS
  95     L_SQL VARCHAR2(4000);
  96   BEGIN
  97     L_SQL := 'alter system kill session ' || '''' || V_SID || ',' ||
  98              V_SERIAL# || '''';
  99     
 100     EXECUTE IMMEDIATE L_SQL;
 101   EXCEPTION
 102     WHEN OTHERS THEN
 103       ROLLBACK;
 104   END;

--//很簡單,根據引數kill session,注意並沒有immediate引數。也就是對於無法滿足登陸條件的回話,直接kill.
--//繼續看了原始碼,還有一段:
1542       PROCEDURE SUBMITKILLSESSION(V_SID NUMBER, V_SERIAL# NUMBER) IS
1543         L_JOB NUMBER;
1544         L_SQL VARCHAR2(4000);
1545       BEGIN
1546         SYS.DBMS_JOB.SUBMIT(L_JOB,
1547                             'tlogon.killsession(' || TO_CHAR(V_SID) || ',' ||
1548                             TO_CHAR(V_SERIAL#) || ');',
1549                             SYSDATE,
1550                             INSTANCE => SYS_CONTEXT('userenv', 'instance'));
1551         COMMIT;
1552         SYS.DBMS_LOCK.SLEEP(3);
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1553       END;
1554     BEGIN
1555       IF TLOGONAUDIT.ACTIONLEVEL = REJECT THEN
1556         TVAR.G_1031 := TRUE;
1557         IF LOWER(SYS_CONTEXT('userenv', 'isdba')) = 'true' THEN
1558           SUBMITKILLSESSION(TLOGONAUDIT.SID, TLOGONAUDIT.SERIAL);
1559         END IF;
1560       
1561         IF LOWER(SYS_CONTEXT('idctx', 'isdba')) = 'true' THEN
1562           SUBMITKILLSESSION(TLOGONAUDIT.SID, TLOGONAUDIT.SERIAL);
1563         END IF;
1564       
1565         IF ISTRIGGERADMIN = TRUE THEN
1566           SUBMITKILLSESSION(TLOGONAUDIT.SID, TLOGONAUDIT.SERIAL);
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1567         END IF;
1568       
1569         
1570         
1571         RAISE PRIVS_ERROR;
1572       END IF;
1573       TVAR.G_SESSID := TLOGONAUDIT.ID;
1574       
1575     EXCEPTION
1576       WHEN OTHERS THEN
1577         L_ERRCODE := SQLCODE;
1578         L_ERRMSG  := SUBSTR(SQLERRM, 1, 100);
1579         ROLLBACK;
1580         RAISEAPPERROR(L_ERRCODE,
1581                       ERR_LOGONRULE,
1582                       STRLOGONRULE || ' doaction ' || L_ERRMSG);
1583     END;
1584   
1585     FUNCTION CHECKACCOUNT RETURN BOOLEAN IS
1586     BEGIN
1587       IF SYS_CONTEXT('userenv', 'session_user') =
1588          SYS_CONTEXT('userenv', 'current_user') THEN
1589         IF TLOGONAUDIT.APPNAME LIKE 'CAPAA-%' OR
1590            TLOGONAUDIT.APPNAME = 'JOB' THEN
1591           RETURN TRUE;
1592         ELSE
1593           RETURN FALSE;
1594         END IF;
1595       END IF;
1596       RETURN TRUE;
1597     END;
1598   ...

1606     PROCEDURE DORESPONSE IS
...
1620       DOACTION;
...
2523     DORESPONSE;

--//alert內容:
Tue May 17 09:11:27 2022
Errors in file /u01/app/oracle/diag/rdbms/ywdb/ywdb1/trace/ywdb1_ora_15721.trc:
ORA-00604: error occurred at recursive SQL level 1
ORA-00028: your session has been killed
ORA-06512: at "SYS.DBMS_LOCK", line 205
ORA-06512: at "HZMCASSET.TLOGON", line 1552
ORA-06512: at "HZMCASSET.TLOGON", line 1566
ORA-06512: at "HZMCASSET.TLOGON", line 1620
ORA-06512: at "HZMCASSET.TLOGON", line 2523

--//根據alert的報錯,可以推斷先執行DORESPONSE,DOACTION,SUBMITKILLSESSION,然後呼叫SYS.DBMS_LOCK.SLEEP(3).
--//我開始以為沒有給SYS.DBMS_LOCK.SLEEP授權執行許可權,導致出現在alert出現以上錯誤.仔細檢查發現授權存在.
--// GRANT EXECUTE ON SYS.DBMS_LOCK TO HZMCASSET;

> Select job, what c40 ,  instance from DBA_JOBS where schema_user = 'HZMCASSET';
       JOB C40                                        INSTANCE
---------- ---------------------------------------- ----------
     61191 tlogon.killsession(581,17111);                    1
     61192 tlogon.killsession(581,17111);                    1

> Select job, what c40 ,  instance from DBA_JOBS where schema_user = 'HZMCASSET';
no rows selected

--//後臺不斷的kill這些會話。我發現有時候出現2行,有時候出現1行,為什麼?我開始不理解開發為什麼使用job的方式,
--//直接呼叫KILLSESSION 過程不就可以了嗎?後來才明白oracle不能kill自己.會出現如下提示:
--// ORA-00027: cannot kill current session

--//後來想明白了,出現2行的情況下,第一次呼叫job失敗,然後啟動另外1個job執行成功.這樣前面job繼續執行時正好在
--//執行SYS.DBMS_LOCK.SLEEP(3)操作(因為sleep 3秒),第2個job已經kill了回話,這樣就會在alert記錄如下:
Tue May 17 09:11:27 2022
Errors in file /u01/app/oracle/diag/rdbms/ywdb/ywdb1/trace/ywdb1_ora_15721.trc:
ORA-00604: error occurred at recursive SQL level 1
ORA-00028: your session has been killed
ORA-06512: at "SYS.DBMS_LOCK", line 205
ORA-06512: at "HZMCASSET.TLOGON", line 1552
ORA-06512: at "HZMCASSET.TLOGON", line 1566
ORA-06512: at "HZMCASSET.TLOGON", line 1620
ORA-06512: at "HZMCASSET.TLOGON", line 2523

--//第一次為什麼會失敗,按照前面ashtop指令碼的輸出,可以定位出現了inactive session等待事件,阻塞了kill操作.
--//為什麼這樣的操作會出現inactive session等待事件呢?繼續分析:

--//檢視跟蹤檔案,應該是MODULE NAME:(JDBC Thin Client)時無法登入。
*** 2022-05-17 10:17:18.201
*** SESSION ID:(8757.6731) 2022-05-17 10:17:18.201
*** CLIENT ID:() 2022-05-17 10:17:18.201
*** SERVICE NAME:(ywdb) 2022-05-17 10:17:18.201
*** MODULE NAME:(JDBC Thin Client) 2022-05-17 10:17:18.201
*** ACTION NAME:() 2022-05-17 10:17:18.201
--//沒有其他相關資訊,也無法知道是那個使用者登入
--//很奇怪應用出現錯誤,無法登陸,沒有任何開發反饋,無語.

> @ wcy &1min "event='inactive session'"
-- Display ASH Wait Chain Signatures script v0.6 BETA by Tanel Poder ( http://blog.tanelpoder.com )
%This     SECONDS        AAS #Blkrs WAIT_CHAIN                                                                       FIRST_SEEN          LAST_SEEN
------ ---------- ---------- ------ -------------------------------------------------------------------------------- ------------------- -------------------
  10%           3         .1      1 -> 590,9377,@2=>7628,11383,@2=>inactive session -> [idle blocker 2,590,9377]     2022-05-19 11:43:07 2022-05-19 11:43:09
                                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  10%           3         .1      1 -> 590,9377,@2=>295,56079,@2=>inactive session -> [idle blocker 2,590,9377]      2022-05-19 11:43:07 2022-05-19 11:43:09
                                       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  10%           3         .1      1 -> 590,9423,@2=>295,56083,@2=>inactive session -> [idle blocker 2,590,9423]      2022-05-19 11:43:32 2022-05-19 11:43:34
  10%           3         .1      1 -> 590,9423,@2=>7628,11385,@2=>inactive session -> [idle blocker 2,590,9423]     2022-05-19 11:43:32 2022-05-19 11:43:34
   7%           2          0      1 -> 590,9349,@2=>3128,17545,@2=>inactive session -> [idle blocker 2,590,9349]     2022-05-19 11:42:47 2022-05-19 11:42:48
   7%           2          0      1 -> 7338,30489,@1=>7340,14317,@1=>inactive session -> [idle blocker 1,7338,30489] 2022-05-19 11:43:25 2022-05-19 11:43:26
   7%           2          0      1 -> 7338,30487,@1=>14,22465,@1=>inactive session -> [idle blocker 1,7338,30487]   2022-05-19 11:43:20 2022-05-19 11:43:21
   7%           2          0      1 -> 7338,30489,@1=>14,22467,@1=>inactive session -> [idle blocker 1,7338,30489]   2022-05-19 11:43:25 2022-05-19 11:43:26
   7%           2          0      1 -> 590,9349,@2=>7628,11379,@2=>inactive session -> [idle blocker 2,590,9349]     2022-05-19 11:42:47 2022-05-19 11:42:48
   3%           1          0      1 -> ,,@=>7340,14317,@1=>inactive session                                          2022-05-19 11:43:27 2022-05-19 11:43:27
   3%           1          0      1 -> 590,9351,@2=>7628,11381,@2=>inactive session -> [idle blocker 2,590,9351]     2022-05-19 11:42:52 2022-05-19 11:42:52
   3%           1          0      1 -> ,,@=>7628,11379,@2=>inactive session                                          2022-05-19 11:42:49 2022-05-19 11:42:49
   3%           1          0      1 -> ,,@=>14,22467,@1=>inactive session                                            2022-05-19 11:43:27 2022-05-19 11:43:27
   3%           1          0      1 -> ,,@=>295,56085,@2=>inactive session                                           2022-05-19 11:43:37 2022-05-19 11:43:37
   3%           1          0      1 -> ,,@=>3128,17545,@2=>inactive session                                          2022-05-19 11:42:49 2022-05-19 11:42:49
   3%           1          0      1 -> ,,@=>295,56081,@2=>inactive session                                           2022-05-19 11:43:12 2022-05-19 11:43:12
16 rows selected.
--//仔細看阻塞的鏈條.似乎阻塞連結出現在相同例項,注意看一個細節sid=7628,serial# 會變化(+2).
--//噢,視乎想明白了,因為不斷有使用者登入,出現serial# 會變化加2的情況,另外寫blog驗證這個情況。

4.總結:
--//透過以上分析可以得到一個結論,大量job在kill session時,會出現inactive session.
--//應該可以在測試環境模擬出inactive session等待事件.
--//開發應該immediate kill 會話也許就不會出現這個提示了。

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2898061/,如需轉載,請註明出處,否則將追究法律責任。

相關文章