[20211229]sql語句包含中文儲存clob的編碼問題.txt

lfree發表於2021-12-31

[20211229]sql語句包含中文儲存clob的編碼問題.txt

--//昨天最佳化一條sql語句,無意中在toad下發現v$sqlarea檢視的sql_fulltext儲存漢字使用的編碼是unicode。
--//比如 病人 unicode編碼是   c575 ba4e
--//         zhs16gbk編碼是   b2a1 c8cb
--//            utf-8編碼是   efbb bfe7 9785 e4ba ba
--//注:utf-8 開頭ef 是否某種特殊標識,我不是很清楚 0xef = 239
--//確定編碼在vim下移動到字元,按ga,在提示行顯示編碼.unicode,utf-8編碼檔案我使用記事本建立的.

1.環境:
SCOTT@book> @ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.4.0     Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production

$ env | grep -i lang
NLS_LANG=AMERICAN_AMERICA.ZHS16GBK

2.測試:
SCOTT@book> select deptno --病人
  2  from dept;
    DEPTNO
----------
        10
        20
        30
        40
--//從來沒有注意putty下無法使用copy and paste中文,先放一放,先看看這個問題。

SCOTT@book> @ hash
HASH_VALUE SQL_ID        CHILD_NUMBER KGL_BUCKET PLAN_HASH_VALUE SQL_EXEC_START      SQL_EXEC_ID
---------- ------------- ------------ ---------- --------------- ------------------- -----------
4052986859 c3xhwymst7azb            0     109547      2913917002 2021-12-30 11:04:56    16777216

SCOTT@book> select sql_fulltext from v$sqlarea where sql_id='c3xhwymst7azb';
SQL_FULLTEXT
--------------------
select deptno --病人
from dept

SCOTT@book> select dump(to_char(sql_fulltext),16) from v$sqlarea where sql_id='c3xhwymst7azb';
DUMP(TO_CHAR(SQL_FULLTEXT),16)
------------------------------------------------------------------------------------------------------
Typ=1 Len=30: 73,65,6c,65,63,74,20,64,65,70,74,6e,6f,20,2d,2d,b2,a1,c8,cb,a,66,72,6f,6d,20,64,65,70,74
                                                        -  -  ~~~~~~~~~~~   
--//73,65,6c,65,63,74,20,64,65,70,74,6e,6f,20,2d,2d,b2,a1,c8,cb,a,66,72,6f,6d,20,64,65,70,74 = select deptno --病人from dept

--//如果在toad下執行:
select sql_fulltext from v$sqlarea where sql_id='c3xhwymst7azb';

--//檢視clob的16進位制:
730065006C00650063007400200064006500700074006E006F0020002D002D00C575BA4E0A00660072006F006D0020006400650070007400
                                                                ~~~~~~~~--//使用unicode編碼。
--//是否意味者實際上儲存在clob的編碼採用的是unicode編碼呢?

3.匯入測試表並分析:

SCOTT@book> create table tx as  select sql_id,sql_fulltext from v$sqlarea where sql_id='c3xhwymst7azb';
Table created.

SCOTT@book> select rowid,tx.sql_id from tx ;
ROWID              SQL_ID
------------------ -------------
AABQsUAAEAAAAKzAAA c3xhwymst7azb

SCOTT@book> @ rowid AABQsUAAEAAAAKzAAA
    OBJECT       FILE      BLOCK        ROW ROWID_DBA            DBA                  TEXT
---------- ---------- ---------- ---------- -------------------- -------------------- ----------------------------------------
    330516          4        691          0  0x10002B3           4,691                alter system dump datafile 4 block 691 ;

SCOTT@book> alter system dump datafile 4 block 691 ;
System altered.

SCOTT@book> @ ttt
tracefile_identifier = /u01/app/oracle/diag/rdbms/book/book/trace/book_ora_31991.trc

4.檢視轉儲檔案內容:
Block header dump:  0x010002b3
 Object id on Block? Y
 seg/obj: 0x50b14  csc: 0x03.1e889fd9  itc: 3  flg: E  typ: 1 - DATA
     brn: 0  bdba: 0x10002b0 ver: 0x01 opc: 0
     inc: 0  exflg: 0

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  C---    0  scn 0x0003.1e889fd9
0x02   0x0000.000.00000000  0x00000000.0000.00  ----    0  fsc 0x0000.00000000
0x03   0x0000.000.00000000  0x00000000.0000.00  ----    0  fsc 0x0000.00000000
bdba: 0x010002b3
data_block_dump,data header at 0x7fd33985827c
===============
tsiz: 0x1f80
hsiz: 0x14
pbl: 0x7fd33985827c
     76543210
flag=--------
ntab=1
nrow=1
frre=-1
fsbo=0x14
fseo=0x1f12
avsp=0x1efe
tosp=0x1efe
0xe:pti[0]  nrow=1  offs=0
0x12:pri[0] offs=0x1f12
block_row_dump:
tab 0, row 0, @0x1f12
tl: 110 fb: --H-FL-- lb: 0x0  cc: 2
col  0: [13]  63 33 78 68 77 79 6d 73 74 37 61 7a 62
col  1: [92]
 00 54 00 01 02 0c 80 00 00 02 00 00 00 01 00 00 06 74 c5 f7 00 48 09 00 00
 00 00 00 00 38 00 00 00 00 00 01 00 73 00 65 00 6c 00 65 00 63 00 74 00 20
 00 64 00 65 00 70 00 74 00 6e 00 6f 00 20 00 2d 00 2d 75 c5 4e ba 00 0a 00
 66 00 72 00 6f 00 6d 00 20 00 64 00 65 00 70 00 74
LOB
Locator:
  Length:        84(92)
  Version:        1
  Byte Length:    2
  LobID: 00.00.00.01.00.00.06.74.c5.f7
  Flags[ 0x02 0x0c 0x80 0x00 ]:
    Type: CLOB
    Storage: BasicFile
    Enable Storage in Row
    Characterset Format: IMPLICIT
    Partitioned Table: No
    Options: VaringWidthReadWrite
  Inode:
    Size:     72
    Flag:     0x09 [ Valid DataInRow ]
    Future:   0x00 (should be '0x00')
    Blocks:   0
    Bytes:    56
    Version:  00000.0000000001
    Inline data[56]
Dump of memory from 0x00007FD33985A1C4 to 0x00007FD33985A1FC
7FD33985A1C0          65007300 65006C00 74006300      [.s.e.l.e.c.t]
7FD33985A1D0 64002000 70006500 6E007400 20006F00  [. .d.e.p.t.n.o. ]
7FD33985A1E0 2D002D00 BA4EC575 66000A00 6F007200  [.-.-u.N....f.r.o]
                      ~~~~~~~~=>確實是unicode編碼
7FD33985A1F0 20006D00 65006400 74007000           [.m. .d.e.p.t]
end_of_block_dump

4.透過bbed觀察看看:
SCOTT@book> alter system checkpoint ;
System altered.

BBED> set dba 4,691
        DBA             0x010002b3 (16777907 4,691)

BBED> x /rcx *kdbr[0]
rowdata[0]                                  @8078
----------
flag@8078: 0x2c (KDRHFL, KDRHFF, KDRHFH)
lock@8079: 0x00
cols@8080:    2

col   0[13] @8081: c3xhwymst7azb
col   1[92] @8095:  0x00  0x54  0x00  0x01  0x02  0x0c
 0x80  0x00  0x00  0x02  0x00  0x00  0x00  0x01  0x00  0x00
 0x06  0x74  0xc5  0xf7  0x00  0x48  0x09  0x00  0x00  0x00
 0x00  0x00  0x00  0x38  0x00  0x00  0x00  0x00  0x00  0x01
 0x00  0x73  0x00  0x65  0x00  0x6c  0x00  0x65  0x00  0x63
 0x00  0x74  0x00  0x20  0x00  0x64  0x00  0x65  0x00  0x70
 0x00  0x74  0x00  0x6e  0x00  0x6f  0x00  0x20  0x00  0x2d
 0x00  0x2d  0x75  0xc5  0x4e  0xba  0x00  0x0a  0x00  0x66
             ~~~~~~~~~~~~~~~~~~~~~~
 0x00  0x72  0x00  0x6f  0x00  0x6d  0x00  0x20  0x00  0x64
 0x00  0x65  0x00  0x70  0x00  0x74

--//昏,順序顛倒過來。

5.看看securefile clob的儲存情況呢?估計儲存資訊編碼不會變。
SCOTT@book> CREATE TABLE ty(sql_id varchar(13) ,sql_fulltext CLOB  ) LOB(sql_fulltext) STORE AS SECUREFILE;
Table created.

SCOTT@book> insert into ty select * from tx;
1 row created.

SCOTT@book> commit ;
Commit complete.

SCOTT@book> select rowid,ty.sql_id from ty ;
ROWID              SQL_ID
------------------ -------------
AABQsXAAEAAAAK8AAA c3xhwymst7azb

SCOTT@book> @ rowid AABQsXAAEAAAAK8AAA
    OBJECT       FILE      BLOCK        ROW ROWID_DBA            DBA                  TEXT
---------- ---------- ---------- ---------- -------------------- -------------------- ----------------------------------------
    330519          4        700          0  0x10002BC           4,700                alter system dump datafile 4 block 700 ;

SCOTT@book> alter system checkpoint ;
System altered.

--//bbed觀察:
BBED> set dba 4,700
        DBA             0x010002bc (16777916 4,700)

BBED> x /rcx *kdbr[0]
rowdata[0]                                  @8084
----------
flag@8084: 0x2c (KDRHFL, KDRHFF, KDRHFH)
lock@8085: 0x01
cols@8086:    2

col   0[13] @8087: c3xhwymst7azb
col   1[86] @8101:  0x00  0x54  0x00  0x01  0x02  0x0c
 0x80  0x80  0x00  0x02  0x00  0x00  0x00  0x01  0x00  0x00
 0x06  0x74  0xc5  0xfa  0x00  0x42  0x48  0x90  0x00  0x3c
 0x00  0x00  0x38  0x01  0x00  0x73  0x00  0x65  0x00  0x6c
 0x00  0x65  0x00  0x63  0x00  0x74  0x00  0x20  0x00  0x64
 0x00  0x65  0x00  0x70  0x00  0x74  0x00  0x6e  0x00  0x6f
 0x00  0x20  0x00  0x2d  0x00  0x2d  0x75  0xc5  0x4e  0xba
                                     ~~~~~~~~~~~~~~~~~~~~~~~
 0x00  0x0a  0x00  0x66  0x00  0x72  0x00  0x6f  0x00  0x6d
 0x00  0x20  0x00  0x64  0x00  0x65  0x00  0x70  0x00  0x74

SCOTT@book> @ ti
New tracefile_identifier = /u01/app/oracle/diag/rdbms/book/book/trace/book_ora_31991_0001.trc

SCOTT@book>  alter system dump datafile 4 block 700 ;
System altered.

Block header dump:  0x010002bc
 Object id on Block? Y
 seg/obj: 0x50b17  csc: 0x03.1e88a461  itc: 2  flg: E  typ: 1 - DATA
     brn: 0  bdba: 0x10002b8 ver: 0x01 opc: 0
     inc: 0  exflg: 0

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0x000b.01d.00000688  0x00c001c1.01db.06  --U-    1  fsc 0x0000.1e88a464
0x02   0x0000.000.00000000  0x00000000.0000.00  ----    0  fsc 0x0000.00000000
bdba: 0x010002bc
data_block_dump,data header at 0x7fd339858264
===============
tsiz: 0x1f98
hsiz: 0x14
pbl: 0x7fd339858264
     76543210
flag=--------
ntab=1
nrow=1
frre=-1
fsbo=0x14
fseo=0x1f30
avsp=0x1f1c
tosp=0x1f1c
0xe:pti[0]  nrow=1  offs=0
0x12:pri[0] offs=0x1f30
block_row_dump:
tab 0, row 0, @0x1f30
tl: 104 fb: --H-FL-- lb: 0x1  cc: 2
col  0: [13]  63 33 78 68 77 79 6d 73 74 37 61 7a 62
col  1: [86]
 00 54 00 01 02 0c 80 80 00 02 00 00 00 01 00 00 06 74 c5 fa 00 42 48 90 00
 3c 00 00 38 01 00 73 00 65 00 6c 00 65 00 63 00 74 00 20 00 64 00 65 00 70
 00 74 00 6e 00 6f 00 20 00 2d 00 2d 75 c5 4e ba 00 0a 00 66 00 72 00 6f 00
 6d 00 20 00 64 00 65 00 70 00 74
LOB
Locator:
  Length:        84(86)
  Version:        1
  Byte Length:    2
  LobID: 00.00.00.01.00.00.06.74.c5.fa
  Flags[ 0x02 0x0c 0x80 0x80 ]:
    Type: CLOB
    Storage: SecureFile
    Characterset Format: IMPLICIT
    Partitioned Table: No
    Options: VaringWidthReadWrite
  SecureFile Header:
    Length:   66
    Old Flag: 0x48 [ DataInRow SecureFile ]
    Flag 0:   0x90 [ INODE Valid ]
    Layers:
      Lengths Array: INODE:60
      INODE:
        00 00 38 01 00 73 00 65 00 6c 00 65 00 63 00 74 00 20 00 64
        00 65 00 70 00 74 00 6e 00 6f 00 20 00 2d 00 2d 75 c5 4e ba
                                                        ~~~~~~~~~~~~
        00 0a 00 66 00 72 00 6f 00 6d 00 20 00 64 00 65 00 70 00 74
end_of_block_dump

--//這也是我不主張在生產系統使用clob型別的原因。clob型別可能給根據字符集,選擇合理的編碼.
--//如果你是英文字元為主的話,每次英文字元消耗2個位元組,如果裡面的編碼是utf-8的話,消耗空間更大.
--//blob型別沒有這類問題,原樣儲存。

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/267265/viewspace-2850243/,如需轉載,請註明出處,否則將追究法律責任。

相關文章