[20210316]MSSM表空間塊ITL的LCK 3.txt

lfree發表於2021-03-16

[20210316]MSSM表空間塊ITL的LCK 3.txt

--//以前的測試,連結:http://blog.itpub.net/267265/viewspace-2564734/=>[20190125]MSSM表空間塊ITL的LCK.txt
--//昨天遇到的問題ORA-04000 the sum of PCTUSED and PCTFREE cannot exceed 100,連結:http://blog.itpub.net/267265/viewspace-2762819/
--//想想是否與這個問題相關呢?這個也是mssm與assm的不同的地方,我決定測試看看。

1.環境:
SCOTT@book> @ ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

CREATE TABLESPACE MSSM DATAFILE
  '/mnt/ramdisk/book/mssm01.dbf' SIZE 40M AUTOEXTEND ON NEXT 16M MAXSIZE UNLIMITED
LOGGING
ONLINE
EXTENT MANAGEMENT LOCAL AUTOALLOCATE
BLOCKSIZE 8K
SEGMENT SPACE MANAGEMENT MANUAL
FLASHBACK ON;

2.測試:

SCOTT@book> create table t tablespace mssm pctfree 99 pctused 1 as select level id, rpad(level, 3500, 'X') vc from dual connect by level <= 1;
Table created.
--//注:預設pctfree=10,PCTUSED=40.

SCOTT@book> @ desc t
Name  Null?    Type
----- -------- ----------------------------
ID             NUMBER
VC             VARCHAR2(4000)

SCOTT@book> select rowid ,id,substr(vc,1,2) from t;
ROWID                      ID SUBS
------------------ ---------- ----
AAAWMkAAHAAAACBAAA          1 1X

SCOTT@book> @ rowid AAAWMkAAHAAAACBAAA
    OBJECT       FILE      BLOCK        ROW ROWID_DBA            DBA                  TEXT
---------- ---------- ---------- ---------- -------------------- -------------------- ----------------------------------------
     90916          7        129          0  0x1C00081           7,129                alter system dump datafile 7 block 129 ;

SCOTT@book> delete from t where id=1;
1 row deleted.

SCOTT@book> commit ;
Commit complete.

SCOTT@book> alter system flush buffer_cache ;
System altered.

SCOTT@book>  alter system dump datafile 7 block 129 ;
System altered.

--//檢查轉儲發現:
Block header dump:  0x01c00081
 Object id on Block? Y
 seg/obj: 0x16324  csc: 0x03.177d9219  itc: 3  flg: O  typ: 1 - DATA
     fsl: 2  fnx: 0x0 ver: 0x01

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  C---    0  scn 0x0003.177d9219
0x02   0x000a.00b.00005d86  0x00c00b60.11db.0b  --U-    2  fsc 0x0db3.177d9231
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03   0x0000.000.00000000  0x00000000.0000.00  ----    0  fsc 0x0000.00000000
--//實際上刪除1條記錄,而ITL=0x02的LCK記錄的是2.這個是我以前遇到的情況。

3.繼續測試:
--//drop table t purge;
SCOTT@book> create table t tablespace mssm pctfree 40 pctused 1 as select level id, rpad('X', 2000, 'X') vc from dual connect by level <= 4;
Table created.

SCOTT@book> @ desc t
Name  Null?    Type
----- -------- ----------------------------
ID             NUMBER
VC             VARCHAR2(2000)

SCOTT@book> select rowid ,id,substr(vc,1,2) from t;
ROWID                      ID SUBS
------------------ ---------- ----
AAAWMnAAHAAAACBAAA          1 XX
AAAWMnAAHAAAACBAAB          2 XX
AAAWMnAAHAAAACCAAA          3 XX
AAAWMnAAHAAAACCAAB          4 XX

SCOTT@book> delete from t where id=1;
1 row deleted.

SCOTT@book> commit ;
Commit complete.

SCOTT@book> alter system flush buffer_cache ;
System altered.

SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.

--//檢查轉儲發現:
Block header dump:  0x01c00081
 Object id on Block? Y
 seg/obj: 0x16327  csc: 0x03.177d9893  itc: 3  flg: -  typ: 1 - DATA
     fsl: 0  fnx: 0x0 ver: 0x01

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  C---    0  scn 0x0003.177d9893
0x02   0x000a.00f.00005d89  0x00c00b6c.11db.0a  --U-    1  fsc 0x07d7.177d98a6
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03   0x0000.000.00000000  0x00000000.0000.00  ----    0  fsc 0x0000.00000000
--//注意看下劃線LCK=1,而不是前面的2.

SCOTT@book> delete from t where id=2;
1 row deleted.

SCOTT@book> commit ;
Commit complete.

SCOTT@book> alter system flush buffer_cache ;
System altered.

SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.

--//檢查轉儲發現:
Block header dump:  0x01c00081
 Object id on Block? Y
 seg/obj: 0x16327  csc: 0x03.177d9893  itc: 3  flg: O  typ: 1 - DATA
     fsl: 3  fnx: 0x0 ver: 0x01

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  C---    0  scn 0x0003.177d9893
0x02   0x000a.00f.00005d89  0x00c00b6c.11db.0a  --U-    1  fsc 0x07d7.177d98a6
0x03   0x000a.011.00005cd6  0x00c00b6b.11db.1e  --U-    2  fsc 0x07d7.177d9977
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//可以發現這次刪除是對應事務槽的LCK=2.也就是多產生的LCK與PCTUSER的大小相關。
--//當我刪除1條記錄時,還沒有滿足pctused=1的情況,而再刪除記錄時,該塊已經沒有記錄,一定這樣滿足pctused=1的情況。
--//這樣標識該塊可以再次投入使用,一些細節我不是很清楚,這樣多記錄一個事務,從哪裡反映出來,我還是不清楚。

4.可以透過如下驗證我的推斷:
--//drop table t purge;
SCOTT@book> create table t tablespace mssm pctfree 40 pctused 60 as select level id, rpad('X', 2000, 'X') vc from dual connect by level <= 4;
Table created.
--//注:pctfree=40,PCTUSED=60. 這樣即使我刪除1條記錄也滿足這個觸發條件(pctused=60).

SCOTT@book> select rowid ,id,substr(vc,1,2) from t;
ROWID                      ID SUBS
------------------ ---------- ----
AAAWMoAAHAAAACBAAA          1 XX
AAAWMoAAHAAAACBAAB          2 XX
AAAWMoAAHAAAACCAAA          3 XX
AAAWMoAAHAAAACCAAB          4 XX

SCOTT@book> delete from t where id=2;
1 row deleted.

SCOTT@book> @ xid
XIDUSN_XIDSLOT_XIDSQN
------------------------------
6.11.2062

SCOTT@book> commit ;
Commit complete.

SCOTT@book> alter system flush buffer_cache ;
System altered.

SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.

--//檢查轉儲發現:
Block header dump:  0x01c00081
 Object id on Block? Y
 seg/obj: 0x16328  csc: 0x03.177d9bae  itc: 3  flg: O  typ: 1 - DATA
     fsl: 2  fnx: 0x0 ver: 0x01

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  C---    0  scn 0x0003.177d9bae
0x02   0x0006.00b.0000080e  0x00c009bb.03c4.14  --U-    2  fsc 0x07d7.177d9c1f
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03   0x0000.000.00000000  0x00000000.0000.00  ----    0  fsc 0x0000.00000000

--//繼續刪除看看:
SCOTT@book> delete from t where id=1;
1 row deleted.

SCOTT@book> commit ;
Commit complete.

SCOTT@book> alter system checkpoint ;
System altered.

SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.

--//檢查轉儲發現:
Block header dump:  0x01c00081
 Object id on Block? Y
 seg/obj: 0x16328  csc: 0x03.177d9bae  itc: 3  flg: O  typ: 1 - DATA
     fsl: 2  fnx: 0x0 ver: 0x01

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  C---    0  scn 0x0003.177d9bae
0x02   0x0006.00b.0000080e  0x00c009bb.03c4.14  --U-    2  fsc 0x07d7.177d9c1f
0x03   0x000a.007.00005d7c  0x00c00b6f.11db.0c  --U-    1  fsc 0x07d7.177d9c92
--//也就是檢測1次。這也是我前面測試遇到的情況。

5.測試到這裡就可以知道理論控制pctused的大小就可以控制LCK產生的數量。
或者這樣將如果事務的dml沒有觸發滿足pctused的條件,LCK就不會原來事務的基礎上增加1。

SCOTT@book> alter table t pctused 10;
Table altered.

SCOTT@book> delete from t where id=3;
1 row deleted.

SCOTT@book> @ xid
XIDUSN_XIDSLOT_XIDSQN
------------------------------
5.2.1925

SCOTT@book> commit ;
Commit complete.

SCOTT@book> alter system checkpoint ;
System altered.

SCOTT@book> alter system dump datafile 7 block 130 ;
System altered.

--//檢查轉儲發現:
Block header dump:  0x01c00082
 Object id on Block? Y
 seg/obj: 0x16328  csc: 0x03.177d9bae  itc: 3  flg: -  typ: 1 - DATA
     fsl: 0  fnx: 0x0 ver: 0x01

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  C---    0  scn 0x0003.177d9bae
0x02   0x0005.002.00000785  0x00c000c7.0547.19  --U-    1  fsc 0x07d7.177d9f0c
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03   0x0000.000.00000000  0x00000000.0000.00  ----    0  fsc 0x0000.00000000
--//這也再次驗證我的判斷,但是有一點我一直沒明白,oracle如何知道還有一個事務呢?比如我再刪除id=4.
--//這時對應的ITL槽LCK記錄的一定是2.

SCOTT@book> delete from t where id=4;
1 row deleted.

SCOTT@book> commit ;
Commit complete.

SCOTT@book> alter system checkpoint ;
System altered.

SCOTT@book> alter system dump datafile 7 block 130 ;
System altered.

--//檢查轉儲發現:
Block header dump:  0x01c00082
 Object id on Block? Y
 seg/obj: 0x16328  csc: 0x03.177d9bae  itc: 3  flg: O  typ: 1 - DATA
     fsl: 3  fnx: 0x0 ver: 0x01

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  C---    0  scn 0x0003.177d9bae
0x02   0x0005.002.00000785  0x00c000c7.0547.19  --U-    1  fsc 0x07d7.177d9f0c
0x03   0x000a.010.00005d8d  0x00c00b70.11db.0c  --U-    2  fsc 0x07d7.177da008
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

6.如果你透過bbed修改itl槽資訊會出現什麼情況呢?
BBED> p dba 7,130 ktbbh.ktbbhitl[2]
struct ktbbhitl[2], 24 bytes                @92
   struct ktbitxid, 8 bytes                 @92
      ub2 kxidusn                           @92       0x000a
      ub2 kxidslt                           @94       0x0010
      ub4 kxidsqn                           @96       0x00005d8d
   struct ktbituba, 8 bytes                 @100
      ub4 kubadba                           @100      0x00c00b70
      ub2 kubaseq                           @104      0x11db
      ub1 kubarec                           @106      0x0c
   ub2 ktbitflg                             @108      0x2002 (KTBFUPB)
   union _ktbitun, 2 bytes                  @110
      sb2 _ktbitfsc                         @110      2007
      ub2 _ktbitwrp                         @110      0x07d7
   ub4 ktbitbas                             @112      0x177da008

BBED> assign dba 7,130 ktbbh.ktbbhitl[2].ktbitflg=0x2001
ub2 ktbitflg                                @108      0x2001 (KTBFUPB)

--//注:相當於修改該ITL槽的LCK=1.
BBED> sum apply
Check value for File 7, Block 130:
current = 0xfae4, required = 0xfae4

BBED> verify
DBVERIFY - Verification starting
FILE = /mnt/ramdisk/book/mssm01.dbf
BLOCK = 130

Block Checking: DBA = 29360258, Block Type = KTB-managed data block
data header at 0x1f41e74
kdbchk: xaction header lock count mismatch
        trans=3 ilk=1 nlo=2
Block 130 failed with check code 6108

--//你可以發現檢查時報錯,oracle的檢查如何知道LCK應該是2呢?在該塊中有什麼或者標識反映這樣的變化呢?
--//難道在段頭dba=7,128處。

BBED> p dba 7,128 ktsfs_txn[0]
struct ktsfs_txn[0], 20 bytes               @4176
   ub2 ktsfsflg                             @4176     0x0001 (KTSUSED)
   struct ktsfsxid, 8 bytes                 @4180
      ub2 kxidusn                           @4180     0x0006
      ub2 kxidslt                           @4182     0x000b
      ub4 kxidsqn                           @4184     0x0000080e
   ub4 ktsfslhd                             @4188     0x01c00081
   ub4 ktsfsltl                             @4192     0x01c00081
--//0x02   0x0006.00b.0000080e  0x00c009bb.03c4.14  --U-    2  fsc 0x07d7.177d9c1f
--//注意:kxidusn,kxidslt,kxidsqn的事務槽記錄的一致。

BBED> p dba 7,128 ktsfs_txn[1]
struct ktsfs_txn[1], 20 bytes               @4196
   ub2 ktsfsflg                             @4196     0x0001 (KTSUSED)
   struct ktsfsxid, 8 bytes                 @4200
      ub2 kxidusn                           @4200     0x000a
      ub2 kxidslt                           @4202     0x0010
      ub4 kxidsqn                           @4204     0x00005d8d
   ub4 ktsfslhd                             @4208     0x01c00082
   ub4 ktsfsltl                             @4212     0x01c00082
--//0x01c00082 = set dba 7,130 = alter system dump datafile 7 block 130 = 29360258
--//0x03   0x000a.010.00005d8d  0x00c00b70.11db.0c  --U-    2  fsc 0x07d7.177da008
--//注意:kxidusn,kxidslt,kxidsqn的事務槽記錄的一致。

--//跟蹤bbed的執行看看:
$ ps -ef | grep bbe[d]
oracle   55921 20345  0 Mar15 pts/5    00:00:00 /usr/local/bin/rlwrap -s 9999 -c -r -i -f /usr/local/share/rlwrap/bbed /u01/app/oracle/product/11.2.0.4/dbhome_1/bin/bbed parfile=bbed.par cmdfile=cmd.par
oracle   55922 55921  0 Mar15 pts/10   00:00:00 bbed app/oracle/product/11.2.0.4/dbhome_1/bin/bbed parfile=bbed.par cmdfile=cmd.par

$ ls -l /proc/55922/fd
total 0
lrwx------ 1 oracle oinstall 64 2021-03-16 10:17:51 0 -> /dev/pts/10
lrwx------ 1 oracle oinstall 64 2021-03-16 10:17:51 1 -> /dev/pts/10
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 10 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/rdbms/mesg/bbedus.msb
lrwx------ 1 oracle oinstall 64 2021-03-16 08:42:11 2 -> /dev/pts/10
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 3 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/rdbms/mesg/bbedus.msb
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 4 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/oracore/mesg/lrmus.msb
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 5 -> /home/oracle/bbed/filelist.txt
lrwx------ 1 oracle oinstall 64 2021-03-16 10:17:51 6 -> /home/oracle/bbed/bifile.bbd
l-wx------ 1 oracle oinstall 64 2021-03-16 10:17:51 7 -> /home/oracle/bbed/log.bbd
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 8 -> /home/oracle/bbed/cmd.par
lrwx------ 1 oracle oinstall 64 2021-03-16 10:17:51 9 -> /mnt/ramdisk/book/mssm01.dbf
--//檔案控制程式碼9對應/mnt/ramdisk/book/mssm01.dbf。

$ strace  -f -p 55922 -e read,lseek -o /tmp/bbed.txt
Process 55922 attached - interrupt to quit
^CProcess 55922 detached

$ egrep "lseek.9" /tmp/bbed.txt
55922 lseek(9, 1064960, SEEK_SET)       = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, SEEK_SET)       = 1065984
55922 lseek(9, 1067008, SEEK_SET)       = 1067008
55922 lseek(9, 1068032, SEEK_SET)       = 1068032
55922 lseek(9, 1069056, SEEK_SET)       = 1069056
55922 lseek(9, 1070080, SEEK_SET)       = 1070080
55922 lseek(9, 1071104, SEEK_SET)       = 1071104
55922 lseek(9, 1072128, SEEK_SET)       = 1072128
55922 lseek(9, 1064960, SEEK_SET)       = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, SEEK_SET)       = 1065984
55922 lseek(9, 1067008, SEEK_SET)       = 1067008
55922 lseek(9, 1068032, SEEK_SET)       = 1068032
55922 lseek(9, 1069056, SEEK_SET)       = 1069056
55922 lseek(9, 1070080, SEEK_SET)       = 1070080
55922 lseek(9, 1071104, SEEK_SET)       = 1071104
55922 lseek(9, 1072128, SEEK_SET)       = 1072128
55922 lseek(9, 1064960, SEEK_SET)       = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, SEEK_SET)       = 1065984
55922 lseek(9, 1067008, SEEK_SET)       = 1067008
55922 lseek(9, 1068032, SEEK_SET)       = 1068032
55922 lseek(9, 1069056, SEEK_SET)       = 1069056
55922 lseek(9, 1070080, SEEK_SET)       = 1070080
55922 lseek(9, 1071104, SEEK_SET)       = 1071104
55922 lseek(9, 1072128, SEEK_SET)       = 1072128
--//130*8192  = 1064960,掃描dba=7,130塊3次,並沒有掃描dba=7,128,oracle是如何檢查的呢?
--//檢查N久發現僅僅可能ktbbh.ktbbhflg,ktbbh.ktbbhfsl

BBED> p dba 7,130 ktbbh
struct ktbbh, 96 bytes                      @20
   ub1 ktbbhtyp                             @20       0x01 (KDDBTDATA)
   union ktbbhsid, 4 bytes                  @24
      ub4 ktbbhsg1                          @24       0x00016328
      ub4 ktbbhod1                          @24       0x00016328
   struct ktbbhcsc, 8 bytes                 @28
      ub4 kscnbas                           @28       0x177d9bae
      ub2 kscnwrp                           @32       0x0003
   sb2 ktbbhict                             @36       3
   ub1 ktbbhflg                             @38       0x03 (KTBFONFL)  -->0 = ON THE FREELIST
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ub1 ktbbhfsl                             @39       0x03             --ITL TX FREELIST SLOT
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
   ub4 ktbbhfnx                             @40       0x00000000

BBED> assign dba 7,130 ktbbh.ktbbhfsl=0x0
ub1 ktbbhfsl                                @39       0x00

BBED> assign dba 7,130 ktbbh.ktbbhflg=0x02
ub1 ktbbhflg                                @38       0x02 (NONE)

BBED> sum apply
Check value for File 7, Block 130:
current = 0xf9e5, required = 0xf9e5

BBED> verify
DBVERIFY - Verification starting
FILE = /mnt/ramdisk/book/mssm01.dbf
BLOCK = 130
--//當然這樣修改沒必要,僅僅知道還有1個事務在這裡.還可以做一個驗證:

BBED> assign dba 7,130 ktbbh.ktbbhitl[1].ktbitflg=0x2002
ub2 ktbitflg                                @84       0x2002 (KTBFUPB)

BBED> assign dba 7,130 ktbbh.ktbbhflg=0x03
ub1 ktbbhflg                                @38       0x03 (KTBFONFL)

BBED> assign dba 7,130 ktbbh.ktbbhfsl=0x02
ub1 ktbbhfsl                                @39       0x02  
--//指向對應的ITL槽(從1開始計數)。

BBED> sum apply
Check value for File 7, Block 130:
current = 0xfbe7, required = 0xfbe7

BBED> verify
DBVERIFY - Verification starting
FILE = /mnt/ramdisk/book/mssm01.dbf
BLOCK = 130

7. 總結:
--//MSSM表空間塊ITL的LCK 出現dml不一致的情況,是觸發檢查pctused引起的,修改了ktbbh.ktbbhfsl,ktbbh.ktbbhflg標識。
--//這與mssm表空間的有pctused屬性有關,實際上昨天出現ORA-04000時,腦子第一個反應並不是這個問題如何解決,而是以前
--//遇到的這個問題。換一句話將如果當時能仔細思考mssm與assm有什麼不同,或許這個問題早知道答案了。
--//itpub 上太缺乏相關人討論這樣的問題,我檢索一下我在itpub上發過這個問題,連結:
--//http://www.itpub.net/thread-2108112-1-1.html
--//我們團隊內部更加不可能........
--//又浪費一個上午的時間....從上午8:30-11:30,真心想站起來走一走。

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2763184/,如需轉載,請註明出處,否則將追究法律責任。

相關文章