[20181030]模擬分散式事務掛起導致TX鎖爭用.txt

lfree發表於2018-10-31

[20181030]模擬分散式事務掛起導致TX鎖爭用.txt


--//隨著業務不斷擴充套件,在業務中使用分散式事務的情況越來越多,除了導致scn抬高外,可能由於網路問題導致連線中斷,

--//甚至由於業務連線外部網路,比較常見ora-01591錯誤.


$ oerr ora 1591

01591, 00000, "lock held by in-doubt distributed transaction %s"

// *Cause:  Trying to access resource that is locked by a dead two-phase commit

//          transaction that is in prepared state.

// *Action: DBA should query the pending_trans$ and related tables, and attempt

//          to repair network connection(s) to coordinator and commit point.

//          If timely repair is not possible, DBA should contact DBA at commit

//          point if known or end user for correct outcome, or use heuristic

//          default if given to issue a heuristic commit or abort command to

//          finalize the local portion of the distributed transaction.


--//網上有相關的文章提到如何解決這個問題.

--//這裡模擬分散式事務掛起導致TX鎖爭用,模擬出現ora-01591錯誤.

--//測試參看連結:


1.環境:

--//客戶端:

SCOTT@book> @ ver1

PORT_STRING                    VERSION        BANNER

------------------------------ -------------- --------------------------------------------------------------------------------

x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production


SCOTT@book> create PUBLIC database link test033 connect to scott identified by btbtms using '192.168.100.33:1521/test:DEDICATED';

Database link created.


SCOTT@book> create table deptx as select * from deptx;

Table created.


SCOTT@book> select sysdate from dual@test033;

SYSDATE

-------------------

2018-10-31 08:55:34

--//OK,連線沒有問題.

SCOTT@book> commit;

Commit complete.


2.測試:

--//為了模擬分散式事務失敗,無法自動恢復的情況,需要把分散式事務自動恢復關掉。

SCOTT@book> alter system disable distributed recovery;

System altered.


--//服務端:

SCOTT@test> @ &r/ver1

PORT_STRING                    VERSION        BANNER

------------------------------ -------------- ----------------------------------------------------------------

x86_64/Linux 2.4.xx            10.2.0.4.0     Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bi


SCOTT@test> create table depty as select * from dept;

Table created.


2.在客戶端執行:

SCOTT@book> update deptx         set loc = lower(loc) where deptno=10;

1 row updated.


SCOTT@book> update depty@test033 set loc = lower(loc) where deptno=20;

1 row updated.


SCOTT@book> @ &r/xid

XIDUSN_XIDSLOT_XIDSQN

------------------------------

10.2.24482


C70                                                                        XIDUSN    XIDSLOT     XIDSQN     UBAFIL     UBABLK     UBASQN     UBAREC STATUS            USED_UBLK  USED_UREC XID              ADDR             START_DATE          FLAG

---------------------------------------------------------------------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------------- ---------- ---------- ---------------- ---------------- ------------------- ----------

ALTER SYSTEM DUMP UNDO BLOCK '_SYSSMU10_1197734989$' XID 10 2 24482;           10          2      24482          3      10115       4716          1 ACTIVE                    1          1 0A000200A25F0000 0000000081BD0748 2018-10-31 08:56:30 4197891

ALTER SYSTEM DUMP UNDO HEADER '_SYSSMU10_1197734989$';

ALTER SYSTEM DUMP DATAFILE 3 BLOCK 10115;


SCOTT@book> COMMIT COMMENT 'ORA-2PC-CRASH-TEST-4';

COMMIT COMMENT 'ORA-2PC-CRASH-TEST-4'

*

ERROR at line 1:

ORA-02054: transaction 10.2.24482 in-doubt

ORA-02059: ORA-2PC-CRASH-TEST-4 in commit comment


SCOTT@book> @ &r/xid

XIDUSN_XIDSLOT_XIDSQN

------------------------------


no rows selected

--//已經沒有事務.


--//"COMMIT COMMENT 'ORA-2PC-CRASH-TEST-4'"這個命令,是Oracle用來強制分散式事務失敗,4代表非提交點的分散式事務失敗在

--//prepare後。其它數值代表的意思如下:


1 Crash commit point after collect

2 Crash non-commit-point site after collect

3 Crash before prepare (non-commit-point site)

4 Crash after prepare (non-commit-point site)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

5 Crash commit point site before commit

6 Crash commit point site after commit

7 Crash non-commit-point site before commit

8 Crash non-commit-point site after commit

9 Crash commit point site before forget

10 Crash non-commit-point site before forget


--//查詢dba_2pc_pending檢視:

SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

LOCAL_TRAN_ID          GLOBAL_TRAN_ID            STATE

---------------------- ------------------------- ----------------

10.2.24482             BOOK.e6127bf4.10.2.24482  prepared


--//本地事務是10.2.24482(與前面能對上),全域性事務是BOOK.e6127bf4.10.2.24482,狀態是prepared。


--//觀察lock狀態:

SCOTT@book> @ &r/viewlock

no rows selected

--//沒有lock.


--//如果我們再去執行更新deptx表的同一行發現會話掛起,過了就會報ORA-01591,Oracle自動檢測到了這行記錄被分散式事務掛起,導

--//致更新失敗。

SCOTT@book> update deptx         set dname = lower(dname) where deptno=10;

update deptx         set dname = lower(dname) where deptno=10

       *

ERROR at line 1:

ORA-01591: lock held by in-doubt distributed transaction 10.2.24482

--//掛起一小會後,出現提示.


SCOTT@book> @ &r/viewlock

no rows selected


SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

LOCAL_TRAN_ID          GLOBAL_TRAN_ID           STATE

---------------------- ------------------------ ----------------

10.2.24482             BOOK.e6127bf4.10.2.24482 prepared


--//解決很簡單:

SYS@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

LOCAL_TRAN_ID          GLOBAL_TRAN_ID           STATE

---------------------- ------------------------ ----------------

10.2.24482             BOOK.e6127bf4.10.2.24482 forced commit

--//state=forced commit.


SYS@book> alter system enable distributed recovery;

System altered.


SYS@book> exec dbms_transaction.purge_lost_db_entry( '10.2.24482');

BEGIN dbms_transaction.purge_lost_db_entry( '10.2.24482'); END;

*

ERROR at line 1:

ORA-06510: PL/SQL: unhandled user-defined exception

ORA-06512: at "SYS.DBMS_TRANSACTION", line 105

ORA-06512: at line 1

--//我的測試前面alter system enable distributed recovery;,開啟以後就可以提交了.


SYS@book> select * from scott.deptx where deptno=10;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        10 ACCOUNTING     new york


SCOTT@book> select * from scott.depty@test033 where deptno=20;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        20 RESEARCH       DALLAS


--//本地修改了,而遠端修改失敗.


--//連結:,提到設定commit_point_strength引數,自己也重複測試看看.

--//以下是轉載:(操作我自己重新做了一次)

如何防止進行prepared狀態


如果分散式事務的操作,經常在某個重要的資料庫例項上進行,那為了防止本地事務異常進入prepared狀態無法自動回滾處理,就需要把

這個重要節點的commit_point_strength引數改大為dblink資料庫中最大值,這樣這個節點在提交分散式事務時,本地事務是不進入

prepared狀態的。


如當前遠端例項commit_point_strength為1,把本地節點的commit_point_strength改為100,修改這個引數需要重啟資料庫例項。


SYS@book> @ &r/hide commit_point_strength

NAME                  DESCRIPTION                                                   DEFAULT_VALUE SESSION_VALUE SYSTEM_VALUE

--------------------- ------------------------------------------------------------- ------------- ------------- ------------

commit_point_strength Bias this node has toward not preparing in a two-phase commit TRUE          1             1


SYS@book> alter system set commit_point_strength=100 scope=spfile;

System altered.


SYS@book> shutdown immediate ;

Database closed.

Database dismounted.

ORACLE instance shut down.


SYS@book> startup

ORACLE instance started.

Total System Global Area  634732544 bytes

Fixed Size                  2255792 bytes

Variable Size             197133392 bytes

Database Buffers          427819008 bytes

Redo Buffers                7524352 bytes

Database mounted.

Database opened.


SYS@book> show parameter commit_point_strength;

NAME                  TYPE    VALUE

--------------------- ------- -----

commit_point_strength integer 100



--//再次模擬上面的過程:

SCOTT@book> alter system disable distributed recovery;

System altered.


SCOTT@book> update deptx         set loc = lower(loc)||'a' where deptno=10;

1 row updated.


SCOTT@book> update depty@test033 set loc = lower(loc) where deptno=20;

1 row updated.


SCOTT@book> @ &r/xid


XIDUSN_XIDSLOT_XIDSQN

------------------------------

10.8.24486


SCOTT@book> COMMIT COMMENT 'ORA-2PC-CRASH-TEST-4';

COMMIT COMMENT 'ORA-2PC-CRASH-TEST-4'

*

ERROR at line 1:

ORA-02050: transaction 10.8.24486 rolled back, some remote DBs may be in-doubt

ORA-02054: transaction 10.47.75654 in-doubt

ORA-02059: ORA-2PC-CRASH-TEST-4 in commit comment

ORA-02063: preceding 2 lines from TEST033



SCOTT@book> @ &r/xid

XIDUSN_XIDSLOT_XIDSQN

------------------------------


no rows selected

--//已經沒有事務.


SYS@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

LOCAL_TRAN_ID          GLOBAL_TRAN_ID           STATE

---------------------- ------------------------ ----------------

10.8.24486             BOOK.e6127bf4.10.8.24486 collecting

--//state=collecting,但是不影響本地更新操作.更新deptx表發現可以更新,不再掛起和報錯。


SCOTT@book> update deptx         set loc = lower(loc)||'a' where deptno=10;

1 row updated.


SCOTT@book> commit ;

Commit complete.


--//不過作者沒測試完成, 在服務端查詢:

SCOTT@test> select * from depty;

select * from depty

              *

ERROR at line 1:

ORA-01591: lock held by in-doubt distributed transaction 10.47.75654


SCOTT@test> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

LOCAL_TRAN_ID          GLOBAL_TRAN_ID           STATE

---------------------- ------------------------ ----------------

10.47.75654            BOOK.e6127bf4.10.8.24486 prepared


--//state=prepared.


SCOTT@test> update depty set loc = lower(loc) where deptno=20;

update depty set loc = lower(loc) where deptno=20

       *

ERROR at line 1:

ORA-01591: lock held by in-doubt distributed transaction 10.47.75654


--//解決方法參考網上解決ora-01591的文件.

--//我這裡執行如下:

--//客戶端:

SYS@book> alter system enable distributed recovery;

System altered.


SYS@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

no rows selected


SCOTT@book> select * from deptx where deptno=10;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        10 accounting     new yorka


SCOTT@book> select * from depty@test033 where deptno=20;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        20 RESEARCH       DALLAS


--//服務端:

SCOTT@test> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

no rows selected


--//commit_point_strength=100本地業務沒有影響,但是遠端存在問題,依舊出現ora-01591.當然我的測試使用alter system disable distributed recovery;

--//並不算真正的模擬.


3.注意收尾工作:

SYS@book> alter system reset commit_point_strength;

System altered.


--//重啟資料庫,檢查引數commit_point_strength設定(略).


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2218154/,如需轉載,請註明出處,否則將追究法律責任。

相關文章