[20211029]udump磁碟空間暴漲.txt

lfree發表於2021-10-29

[20211029]udump磁碟空間暴漲.txt

--//生產系統,udump磁碟空間暴漲.記錄一下診斷與解決過程.

1.環境:
SQL> select * from v$version where rownum=1;
BANNER
----------------------------------------------------------------
Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bi

2.分析:

# du -sm udump/ ; sleep 10 ; du -sm udump/
388697  udump/
388701  udump/
--//10秒增加388701-388697 = 4M.
--//10g下udump目錄下檔案格式是<oracle_sid>_ora_<spid>.trc. 這樣以前的也dump也儲存在這個檔案裡面,不能看前面的部分。
--//開啟其中一個發現如下內容:
...
WAIT #0: nam='SQL*Net message to client' ela= 0 driver id=1413697536 #bytes=1 p3=0 obj#=-1 tim=1597052889224064
WAIT #0: nam='SQL*Net message from client' ela= 3086 driver id=1413697536 #bytes=1 p3=0 obj#=-1 tim=1597052889331166
=====================
PARSING IN CURSOR #1 len=82 dep=1 uid=0 oct=3 lid=0 tim=1597052889331451 hv=3873422482 ad='a47dba68'
select privilege# from sysauth$ where (grantee#=:1 or grantee#=1) and privilege#>0
END OF STMT
PARSE #1:c=0,e=65,p=0,cr=0,cu=0,mis=0,r=0,dep=1,og=4,tim=1597052889331449
BINDS #1:
kkscoacd
 Bind#0
  oacdty=02 mxl=22(22) mxlc=00 mal=00 scl=00 pre=00
  oacflg=08 fl2=0001 frm=00 csi=00 siz=24 off=0
  kxsbbbfp=7fdcf3c4d4c0  bln=22  avl=02  flg=05
  value=62
EXEC #1:c=1000,e=172,p=0,cr=0,cu=0,mis=0,r=0,dep=1,og=4,tim=1597052889331687
FETCH #1:c=0,e=66,p=0,cr=4,cu=0,mis=0,r=1,dep=1,og=4,tim=1597052889331771
--//感覺是開啟10046跟蹤。

SQL> oradebug setmypid
Statement processed.

SQL> oradebug eventdump system
10046 trace name CONTEXT level 12, forever

SQL> oradebug eventdump session
10046 trace name CONTEXT level 12, forever

--//可以再次確定某人在系統級別開啟10046跟蹤。
--//檢查alert檔案可以確定時間發生在9/29 15:39:08.
# grep -C5 -i "alter system " alert_xxxx.log | grep -C5 10046
kupprdp: master process DM00 started with pid=426, OS id=3378
         to execute - SYS.KUPM$MCP.MAIN('SYS_EXPORT_FULL_02', 'SYS', 'KUPC$C_1_20210929153149', 'KUPC$S_1_20210929153149', 0);
kupprdp: worker process DW01 started with worker id=1, pid=429, OS id=3380
         to execute - SYS.KUPW$WORKER.MAIN('SYS_EXPORT_FULL_02', 'SYS');
Wed Sep 29 15:39:08 2021
OS Pid: 3655 executed alter system set events '10046 trace name context forever, level 12'
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Wed Sep 29 15:43:54 2021
WARNING: inbound connection timed out (ORA-3136)
Wed Sep 29 15:43:54 2021
WARNING: inbound connection timed out (ORA-3136)
Wed Sep 29 15:43:54 2021

SQL> alter system set events '10046 trace name context off';
System altered.

--//重新登入檢查:
SQL> oradebug setmypid
Statement processed.

SQL> oradebug eventdump session
Statement processed.

SQL> oradebug eventdump system
Statement processed.

--//刪除垃圾檔案.
# find . -mtime +0  -name "*.trc"  -print | xargs -P10 rm

select * from DBA_AUDIT_TRAIL;
--//10g預設沒有開啟審計,無法任何記錄。

--//實際上這樣還是不行,大量的檔案控制程式碼沒有釋放。空間還是無法回收。
# lsof | grep delete | sort -k7 -nr | head
oracle    30130  oracle    2w      REG                8,3  9503424512   32664042 /u01/app/oracle/admin/xxxx/bdump/xxxx_lns1_30130.trc (deleted)
oracle    28877  oracle    5w      REG                8,3  3274162176   32715899 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_28877.trc (deleted)
oracle    24910  oracle    5w      REG                8,3   522918304   32710401 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_24910.trc (deleted)
oracle    29359  oracle    5w      REG                8,3   394892188   32688562 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_29359.trc (deleted)
oracle    24271  oracle    5w      REG                8,3   254185472   32715859 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_24271.trc (deleted)
oracle    29083  oracle    5w      REG                8,3   241707825   32650091 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_29083.trc (deleted)
oracle    25070  oracle    5w      REG                8,3   237270275   32701512 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_25070.trc (deleted)
oracle    24874  oracle    5w      REG                8,3   188691034   32642091 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_24874.trc (deleted)
oracle    10944  oracle    5w      REG                8,3   188186624   32661146 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_10944.trc (deleted)
oracle    28732  oracle    5w      REG                8,3   186159881   32657692 /u01/app/oracle/admin/xxxx/udump/xxxx_ora_28732.trc (deleted)

--//利用中午的空隙,kill 掉全部使用者程式.
# lsof | grep delete | grep xxxx_ora_ | sort -k7 -nr | grep -v lkinstxxxx | awk '{print $2}'| paste -sd, | xargs ps -fp
# lsof | grep delete | grep xxxx_ora_ | sort -k7 -nr | grep -v lkinstxxxx | awk '{print $2}'| paste -sd, | xargs kill -9
or
# lsof | grep delete | grep xxxx_ora_ | sort -k7 -nr | grep -v lkinstxxxx | awk '{print $2}'| xargs -IQ kill -9 Q

--//xxxx_lns1_30130 程式是傳輸日誌的程式.
SYS@xxxx> select * from V$BGPROCESS where name like 'LNS%' and Paddr<>'00';
PADDR              PSERIAL# NAME                 DESCRIPTION                         ERROR
---------------- ---------- -------------------- ------------------------------ ----------
00000000A5BB9F18         89 LNS1                 Network Server 1               0

SYS@xxxx> alter system set log_archive_dest_state_2=defer;
System altered.

SYS@xxxx> alter system archive log current;
System altered.

$ ps -ef | grep ln[s]
--//這樣對應的lns程式消失了.

SYS@xxxx> SYS@xxxx> alter system set log_archive_dest_state_2=enable ;
System altered.

SYS@xxxx> alter system archive log current;
System altered.

$ ps -ef | grep ln[s]
oracle   11882     1  0 15:59 ?        00:00:00 ora_lns1_xxxx
--//再次啟動,這樣磁碟空間就可以回收了.
--//整個過程並不複雜,但是浪費了一個上午時間.

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2839615/,如需轉載,請註明出處,否則將追究法律責任。

相關文章