[20181031]模擬ora-01591錯誤.txt

lfree發表於2018-10-31

[20181031]模擬ora-01591錯誤.txt


--//分散式事務使用dblink更新多個資料庫的事務,最常見的ORA-01591錯誤.

$ oerr ora 1591

01591, 00000, "lock held by in-doubt distributed transaction %s"

// *Cause:  Trying to access resource that is locked by a dead two-phase commit

//          transaction that is in prepared state.

// *Action: DBA should query the pending_trans$ and related tables, and attempt

//          to repair network connection(s) to coordinator and commit point.

//          If timely repair is not possible, DBA should contact DBA at commit

//          point if known or end user for correct outcome, or use heuristic

//          default if given to issue a heuristic commit or abort command to

//          finalize the local portion of the distributed transaction.


--//透過簡單的模擬產生這個錯誤.


1.環境:

--//客戶端:

SCOTT@book> @ ver1

PORT_STRING                    VERSION        BANNER

------------------------------ -------------- --------------------------------------------------------------------------------

x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production


SCOTT@book> create PUBLIC database link test033 connect to scott identified by btbtms using '192.168.100.33:1521/test:DEDICATED';

Database link created.


SCOTT@book> create table deptx as select * from deptx;

Table created.


SCOTT@book> select sysdate from dual@test033;

SYSDATE

-------------------

2018-10-31 08:55:34

--//OK,連線沒有問題.

SCOTT@book> commit;

Commit complete.


--//服務端:

SCOTT@test> @ &r/ver1

PORT_STRING                    VERSION        BANNER

------------------------------ -------------- ----------------------------------------------------------------

x86_64/Linux 2.4.xx            10.2.0.4.0     Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 - 64bi


SCOTT@test> create table depty as select * from dept;

Table created.


SCOTT@book> select * from deptx;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        10 ACCOUNTING     NEW YORK

        20 RESEARCH       DALLAS

        30 SALES          CHICAGO

        40 OPERATIONS     BOSTON


SCOTT@book> select * from depty@test033;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        10 ACCOUNTING     NEW YORK

        20 RESEARCH       DALLAS

        30 SALES          CHICAGO

        40 OPERATIONS     BOSTON


2.一些解析與說明:

--//這次不再使用alter system disable distributed recovery;.

--//"COMMIT COMMENT 'ORA-2PC-CRASH-TEST-N'"這個命令,是Oracle用來強制分散式事務失敗,4代表非提交點的分散式事務失敗在

--//prepare後。其它數值代表的意思如下:


1 Crash commit point after collect

2 Crash non-commit-point site after collect

3 Crash before prepare (non-commit-point site)

4 Crash after prepare (non-commit-point site)

5 Crash commit point site before commit

6 Crash commit point site after commit

7 Crash non-commit-point site before commit

8 Crash non-commit-point site after commit

9 Crash commit point site before forget

10 Crash non-commit-point site before forget


--//實際上COMMIT COMMENT 'ORA-2PC-CRASH-TEST-7'就應該模擬出來,測試看看.


3.測試:

SCOTT@book> update deptx         set loc = lower(loc) where deptno=10;

1 row updated.


SCOTT@book> update depty@test033 set loc = lower(loc) where deptno=20;

1 row updated.


SCOTT@book> COMMIT COMMENT 'ORA-2PC-CRASH-TEST-7';

COMMIT COMMENT 'ORA-2PC-CRASH-TEST-7'

*

ERROR at line 1:

ORA-02054: transaction 9.30.2968 in-doubt

ORA-02059: ORA-2PC-CRASH-TEST-7 in commit comment


SCOTT@book> @ &r/xid

XIDUSN_XIDSLOT_XIDSQN

------------------------------


no rows selected

--//已經沒有事務.


SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

no rows selected


SCOTT@book> select * from deptx where deptno=10;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        10 ACCOUNTING     new york


SCOTT@book> select * from depty@test033 where deptno=20;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        20 RESEARCH       dallas


--//這種方式不行.換1個方式測試.


4.測試連線異常中斷:

--//客戶端:

SCOTT@book> @ &r/xid

XIDUSN_XIDSLOT_XIDSQN

------------------------------

no rows selected


SCOTT@book> update deptx         set loc = upper(loc) where deptno=10;

1 row updated.


SCOTT@book> update depty@test033 set loc = upper(loc) where deptno=20;

1 row updated.


SCOTT@book> @ &r/spid

       SID    SERIAL# PROCESS                  SERVER    SPID       PID  P_SERIAL# C50

---------- ---------- ------------------------ --------- ------ ------- ---------- --------------------------------------------------

       274         19 28023                    DEDICATED 28024       21          8 alter system kill session '274,19' immediate;

--//當前連線程式號=28024.


--//服務端: (我以前提過v$session.process記錄的是客戶端程式號.)

SCOTT@test> select spid,program from v$process where addr in (select paddr from v$session where process=28024);

SPID   PROGRAM

------ --------------------------

12619  oracle@xxxx


# lsof -P -i -n |grep -e 12619 -e COMMAND

COMMAND     PID    USER   FD   TYPE   DEVICE SIZE/OFF NODE NAME

oracle    12619  oracle   14u  IPv4 28066050      0t0  TCP 192.168.100.33:1521->192.168.100.78:16778 (ESTABLISHED)


--//也可以對上.

# kill -9 12619


SCOTT@book> commit ;

commit

*

ERROR at line 1:

ORA-02054: transaction 10.2.24501 in-doubt

ORA-03150: end-of-file on communication channel for database link

ORA-02063: preceding line from TEST033


SCOTT@book> @ &r/xid

XIDUSN_XIDSLOT_XIDSQN

------------------------------

no rows selected


SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

no rows selected


--//這樣也不行.


5.測試(服務端資料庫異常關閉)


SCOTT@book>  @ &r/xid

XIDUSN_XIDSLOT_XIDSQN

------------------------------

no rows selected


SCOTT@book> update deptx         set loc = upper(loc) where deptno=10;

1 row updated.


SCOTT@book> update depty@test033 set loc = upper(loc) where deptno=20;

1 row updated.


--//在服務端異常關閉:

SYS@test> shutdown abort ;

ORACLE instance shut down.

--//補充說明:正常關閉shutdown immediate也是一樣能演示這個問題.


SCOTT@book> commit ;

commit

*

ERROR at line 1:

ORA-02054: transaction 10.23.24502 in-doubt

ORA-03150: end-of-file on communication channel for database link

ORA-02063: preceding line from TEST033


SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

LOCAL_TRAN_ID          GLOBAL_TRAN_ID            STATE

---------------------- ------------------------- ----------------

10.23.24502            BOOK.e6127bf4.10.23.24502 prepared


--//OK,這次模擬出來了.

SCOTT@book> update deptx         set loc = upper(loc) where deptno=10;

update deptx         set loc = upper(loc) where deptno=10

*

ERROR at line 1:

ORA-01591: lock held by in-doubt distributed transaction 10.23.24502


--//重新啟動服務端:

SYS@test> startup

ORACLE instance started.

Total System Global Area  486539264 bytes

Fixed Size                  2084872 bytes

Variable Size             360714232 bytes

Database Buffers          113246208 bytes

Redo Buffers               10493952 bytes

Database mounted.

Database opened.


SCOTT@test> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

no rows selected


SCOTT@test> select * from depty where deptno=20;

    DEPTNO DNAME          LOC

---------- -------------- -------------

        20 RESEARCH       dallas


--//客戶端:

SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

no rows selected


---//可以發現只要服務端資料庫起來,這個問題自然消失,自己rollback.

--//再次重複這個測試(過程略):


SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

LOCAL_TRAN_ID          GLOBAL_TRAN_ID          STATE

---------------------- ----------------------- ----------------

5.21.1953              BOOK.e6127bf4.5.21.1953 prepared


--//解決ora-01591錯誤:我個人總喜歡先嚐試提交:

SCOTT@book> commit force '5.21.1953';

Commit complete.


SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

LOCAL_TRAN_ID          GLOBAL_TRAN_ID          STATE

---------------------- ----------------------- ----------------

5.21.1953              BOOK.e6127bf4.5.21.1953 forced commit


--//現在修改已經沒有問題:

SCOTT@book> update deptx         set loc = upper(loc) where deptno=10;

1 row updated.


SCOTT@book> commit ;

Commit complete.


SCOTT@book> exec DBMS_TRANSACTION.PURGE_LOST_DB_ENTRY('5.21.1953')

BEGIN DBMS_TRANSACTION.PURGE_LOST_DB_ENTRY('5.21.1953'); END;


*

ERROR at line 1:

ORA-01031: insufficient privileges

ORA-06512: at "SYS.DBMS_TRANSACTION", line 97

ORA-06512: at line 1

--//許可權不夠.


SYS@book> set transaction use rollback segment SYSTEM;

Transaction set.


SYS@book>  exec DBMS_TRANSACTION.PURGE_LOST_DB_ENTRY('5.21.1953');

PL/SQL procedure successfully completed.


SCOTT@book> select local_tran_id, GLOBAL_TRAN_ID, state from dba_2pc_pending;

no rows selected


--//不過再不行,參照網上的解決方法,實際上刪除一些基表的內容:

set transaction use rollback segment system;

--delete from dba_2pc_pending where local_tran_id = '1.34.240088';

delete from sys.pending_trans$ where local_tran_id ='1.34.240088';

delete from pending_sessions$ where local_tran_id = '1.34.240088';

delete from pending_sub_sessions$ where local_tran_id = '1.34.240088';

commit;


--//不行,還給人為pending_trans$插入記錄.網上有詳細的介紹.


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2218167/,如需轉載,請註明出處,否則將追究法律責任。

相關文章