openGauss 賬本資料庫,你不知道的那些事兒
摘要
本文將透過對比官方文件關於“設定賬本資料庫”中的幾個章節,結合原始碼來說說文件中操作步驟背後的原理。
賬本資料庫概述
你知道的那些事兒
官方文件
賬本資料庫融合了區塊鏈思想,將使用者操作記錄至兩種歷史表中:使用者歷史表和全域性區塊表。當使用者建立防篡改使用者表時,系統將自動為該表新增一個 hash 列來儲存每行資料的 hash 摘要資訊,同時在 blockchain 模式下會建立一張使用者歷史表來記錄對應使用者表中每條資料的變更行為;而使用者對防篡改使用者表的一次修改行為將記錄至全域性區塊表中。由於歷史表具有隻可追加不可修改的特點,因此歷史表記錄串聯起來便形成了使用者對防篡改使用者表的修改歷史。
你不知道的那些事兒
操作步驟
1.建立防篡改模式。
openGauss=# CREATE SCHEMA ledgernsp WITH BLOCKCHAIN;
首先在這個 SQL 中我們可以看到WITH BLOCKCHAIN ,這裡說明建立出來的 SCHEMA 與普通的 SCHEMA 不同,但就行不同在哪裡我們後面會提到。
從語法解析看,增加了對 BLOCKCHAIN 的處理,標記了是否為賬本模式。
CreateSchema ::= CREATE SCHEMA schema_name
[ AUTHORIZATION user_name ] [WITH BLOCKCHAIN] [ schema_element [ ... ] ];
CreateSchemaStmt 結構中增加了 bool 型別欄位 hasBlockChain
typedef struct CreateSchemaStmt {
NodeTag type;
char schemaname; / the name of the schema to create */
char authid; / the owner of the created schema /
bool hasBlockChain; / whether this schema has blockchain */
List schemaElts; / schema components (list of parsenodes) /
TempType temptype; / if the schema is temp table's schema */
List uuids; / the list of uuid(only create sequence or table with serial type need) */
} CreateSchemaStmt;
你不知道的限制
賬本資料庫對於 ALTER SCHEMA 的幾個限制
1、dbe_perf 和 snapshot 兩個模式不能 ALTER 為 blockchain 模式。
if (withBlockchain && ((strncmp(nspName, "dbe_perf", STR_SCHEMA_NAME_LENGTH) == 0) ||
(strncmp(nspName, "snapshot", STR_SNAPSHOT_LENGTH) == 0))) {
ereport(ERROR, (errcode(ERRCODE_OPERATE_FAILED),
errmsg("The schema '%s' doesn't allow to alter to blockchain schema", nspName)));
}
2、系統模式不能 ALTER 為 blockchain 模式。
if (withBlockchain && !g_instance.attr.attr_common.allowSystemTableMods &&
!u_sess->attr.attr_common.IsInplaceUpgrade && IsReservedName(nspName))
ereport(ERROR,
(errcode(ERRCODE_RESERVED_NAME),
errmsg("The system schema "%s" doesn't allow to alter to blockchain schema", nspName)));
3、包含了表的 SCHEMA 不能 ALTER 為 blockchain 模式。
/*
* If the any table exists in the schema, do not change to ledger schema.
*/
StringInfo existTbl = TableExistInSchema(HeapTupleGetOid(tup), TABLE_TYPE_ANY);
if (existTbl->len != 0) {
if (withBlockchain) {
ereport(ERROR,
(errcode(ERRCODE_RESERVED_NAME),
errmsg("It is not supported to change "%s" to blockchain schema which includes tables.",
nspName)));
} else {
ereport(ERROR,
(errcode(ERRCODE_RESERVED_NAME),
errmsg("It is not supported to change "%s" to normal schema which includes tables.",
nspName)));
}
}
檢視模式
2.在防篡改模式下建立防篡改使用者表。
openGauss=# CREATE TABLE ledgernsp.usertable(id int, name text);
你不知道的限制
建立賬本表的同時會自動建立一個“歷史表”和“歷史表的索引”。
在建表時CreateCommand會呼叫AlterCreateChainTables,如果是賬本表再去呼叫create_hist_relation來建立歷史表
CreateCommand -> AlterCreateChainTables -> create_hist_relation
/*
- AlterCreateChainTables
- If it is a ledger usertable, that should invoking this function.
- then create a history table.
*/
void AlterCreateChainTables(Oid relOid, Datum reloptions, CreateStmt *mainTblStmt)
{
Relation rel = NULL;
rel = heap_open(relOid, AccessExclusiveLock);
/* Ledger user table only support for the regular relation. */
if (!rel->rd_isblockchain) {
heap_close(rel, NoLock);
return;
}
create_hist_relation(rel, reloptions, mainTblStmt);
heap_close(rel, NoLock);
}
歷史表命名規則,參見函式 get_hist_name
bool get_hist_name(Oid relid, const char *rel_name, char *hist_name, Oid nsp_oid, const char nsp_name)
{
errno_t rc;
if (!OidIsValid(relid) || rel_name == NULL) {
return false;
}
nsp_oid = OidIsValid(nsp_oid) ? nsp_oid : get_rel_namespace(relid);
nsp_name = (nsp_name == NULL) ? get_namespace_name(nsp_oid) : nsp_name;
int part_hist_name_len = strlen(rel_name) + strlen(nsp_name) + 1;
if (part_hist_name_len + strlen("hist") >= NAMEDATALEN) {
rc = snprintf_s(hist_name, NAMEDATALEN, NAMEDATALEN - 1, "%d%d_hist", nsp_oid, relid);
securec_check_ss(rc, "", "");
} else {
rc = snprintf_s(hist_name, NAMEDATALEN, NAMEDATALEN - 1, "%s_%s_hist", nsp_name, rel_name);
securec_check_ss(rc, "", "");
}
return true;
}
表名最大長度 #define NAMEDATALEN 64
如果沒有超過長度限制:schema_table_hist
如果超過長度限制:schema(oid)_talbe(oid)_hist,因為 oid 是 unsigned int 型別最大值為 4294967295 為 10 位,所以這種命名規則的最大長度為 10+1+10+1+4+\0=27,因此永遠不會超過最大長度 64。
omm=# create schema aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa with blockchain;
CREATE SCHEMA
omm=# create table aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.bbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(id int);
CREATE TABLE
歷史表索引命名規則,參見函式 get_hist_name
/ now create index for this new history table */
char hist_index_name[NAMEDATALEN];
rc = snprintf_s(hist_index_name, NAMEDATALEN, NAMEDATALEN - 1, "gs_hist_%u_index", relid);
命名規則:gshist\$(賬本表 oid)_index。
3、修改防篡改使用者表資料
對防篡改使用者表執行 INSERT/UPDATE/DELETE。
openGauss=# INSERT INTO ledgernsp.usertable VALUES(1, 'alex'), (2, 'bob'), (3, 'peter');
INSERT 0 3
openGauss=# SELECT *, hash FROM ledgernsp.usertable ORDER BY id;
id | name | hash
----+-------+------------------
1 | alex | 1f2e543c580cb8c5
2 | bob | 8fcd74a8a6a4b484
3 | peter | f51b4b1b12d0354b
(3 rows)
openGauss=# UPDATE ledgernsp.usertable SET name = 'bob2' WHERE id = 2;
UPDATE 1
openGauss=# SELECT *, hash FROM ledgernsp.usertable ORDER BY id;
id | name | hash
----+-------+------------------
1 | alex | 1f2e543c580cb8c5
2 | bob2 | 437761affbb7c605
3 | peter | f51b4b1b12d0354b
(3 rows)
openGauss=# DELETE FROM ledgernsp.usertable WHERE id = 3;
DELETE 1
openGauss=# SELECT *, hash FROM ledgernsp.usertable ORDER BY id;
id | name | hash
----+------+------------------
1 | alex | 1f2e543c580cb8c5
2 | bob2 | 437761affbb7c605
(2 rows)
檢視賬本歷史操作記錄
你知道的那些事兒
官方文件
前提條件
系統中需要有審計管理員或者具有審計管理員許可權的角色。
資料庫正常執行,並且對防篡改資料庫執行了一系列增、刪、改等操作,保證在查詢時段內有賬本操作記錄結果產生。
你不知道的那些事兒
基本操作
1、查詢全域性區塊表記錄。
omm=# SELECT * FROM gs_global_chain;
blocknum | dbname | username | starttime | relid | relnsp | relname | relhash | globalhash |
txcommand
----------+--------+----------+-------------------------------+-------+-----------+-----------+------------------+----------------------------------+----------------
1 | omm | omm | 2022-09-17 13:59:37.84824+00 | 16404 | ledgernsp | usertable | a41714001181a294 | 83927d11ba1fd678e8f4b0723a9cd5f2 | INSERT INTO led
gernsp.usertable VALUES(1, 'alex'), (2, 'bob'), (3, 'peter');
2 | omm | omm | 2022-09-17 13:59:51.723068+00 | 16404 | ledgernsp | usertable | b3a9ed0755131181 | b5ee73b6c20c817230182f6373c78e20 | UPDATE ledgerns
p.usertable SET name = 'bob2' WHERE id = 2;
3 | omm | omm | 2022-09-17 13:59:58.159596+00 | 16404 | ledgernsp | usertable | 0ae4b4e4ed2fcab5 | 0cc9938cf7f1ed7f7f1a03c29954380a | DELETE FROM led
gernsp.usertable WHERE id = 3;
(3 rows)
註冊鉤子,在對賬本做修改操作的時候註冊的鉤子函式ledger_ExecutorEnd被回撥。
/*
- ledger_hook_init -- install of gchain block record hook.
*/
void ledger_hook_init(void)
{
t_thrd.security_ledger_cxt.prev_ExecutorEnd = (void *)ExecutorEnd_hook;
ExecutorEnd_hook = ledger_ExecutorEnd;
}
生成 globalhash 規則
全域性區塊表記錄主要是生成 globalhash.
呼叫過程:
ledger_ExecutorEnd —> ledger_gchain_append —> set_gchain_comb_string
—> get_next_g_blocknum
—> gen_global_hash
set_gchain_comb_string,是一組字串拼接成的:rel_name + nsp_name + query_string + rel_hash
get_next_g_blocknum,用全域性變數 g_blocknum 儲存
gen_global_hash,是的 set_gchain_comb_string 拼出來的串+上一條的 hash 值拼串然後再去 hash——區塊鏈的基本原理
bool gen_global_hash(hash32_t *hash_buffer, const char *info_string, bool exist, const hash32_t *prev_hash)
{
errno_t rc = EOK;
int comb_strlen;
char comb_string = NULL;
/
* Previous block not exists means current insertion block is genesis,
* then we use global systable as origin combine string for globalhash
* generation. If previous block exists, we will use previous global
* hash as combine string to calculate globalhash.
/
if (!exist) {
/ generate genesis block globalhash */
comb_strlen = strlen(GCHAIN_NAME) + strlen(info_string) + 1;
comb_string = (char )palloc0(comb_strlen);
rc = snprintf_s(comb_string, comb_strlen, comb_strlen - 1, "%s%s", GCHAIN_NAME, info_string);
securec_check_ss(rc, "", "");
} else {
/ use previous globalhash and current block info to calculate globalhash. */
char *pre_hash_str = DatumGetCString(DirectFunctionCall1(hash32out, HASH32GetDatum(prev_hash)));
comb_strlen = strlen(pre_hash_str) + strlen(info_string) + 1;
comb_string = (char *)palloc0(comb_strlen);
rc = snprintf_s(comb_string, comb_strlen, comb_strlen - 1, "%s%s", info_string, pre_hash_str);
securec_check_ss(rc, "", "");
pfree_ext(pre_hash_str);
}
if (!pg_md5_binary(comb_string, comb_strlen - 1, hash_buffer->data)) {
pfree(comb_string);
ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("Failed to generate globalhash, out of memory")));
return false;
}
pfree(comb_string);
return true;
}
在src/gausskernel/runtime/executor/nodeModifyTable.cpp中更新_hist 表的 hash 值。
透過 set_user_tuple_hash 得到賬本表 hash 列的值。
/*
-
set_user_tuple_hash -- calculate and fill the hash attribute of user table's tuple.
-
tup: row data of user table
-
rel: user table
-
hash_exists: whether tuple comes with tuplehash.
-
Note: if hash_exists is true, we should recompute
-
tuple hash and compare with tuplehash of itself.
*/
HeapTuple set_user_tuple_hash(HeapTuple tup, Relation rel, bool hash_exists)
{
uint64 row_hash = gen_user_tuple_hash(rel, tup);
int hash_attrno = user_hash_attrno(rel->rd_att);
if (hash_exists) {
bool is_null;
Datum hash = heap_getattr(tup, hash_attrno + 1, rel->rd_att, &is_null);
if (is_null || row_hash != DatumGetUInt64(hash)) {
ereport(ERROR, (errcode(ERRCODE_OPERATE_INVALID_PARAM), errmsg("Invalid tuple hash.")));
}
return tup;
}
Datum values = NULL;
bool nulls = NULL;
bool replaces = NULL;
/ Build modified tuple /
int2 nattrs = RelationGetNumberOfAttributes(rel);
values = (Datum)palloc0(nattrs * sizeof(Datum));
nulls = (bool)palloc0(nattrs * sizeof(bool));
replaces = (bool)palloc0(nattrs * sizeof(bool));
values[hash_attrno] = UInt64GetDatum(row_hash);
replaces[hash_attrno] = true;
HeapTuple newtup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, replaces);pfree_ext(values);
pfree_ext(nulls);
pfree_ext(replaces);
return newtup;
}
校驗賬本資料一致性
你知道的那些事兒
官方文件
資料庫正常執行,並且對防篡改資料庫執行了一系列增、刪、改等操作,保證在查詢時段內有賬本操作記錄結果產生。
你不知道的那些事兒
基本操作
1、校驗防篡改使用者表 ledgernsp.usertable 與其對應的歷史表是否一致。
omm=# SELECT pg_catalog.ledger_hist_check('ledgernsp', 'usertable');
ledger_hist_check
t
(1 row)
校驗使用者許可權 Only super user or audit admin have access right to blockchain nsp
/* Only super user or audit admin have access right to blockchain nsp */
if (nsp_oid == PG_BLOCKCHAIN_NAMESPACE) {
return gs_blockchain_aclmask(roleid, mask);
}
校驗歷史表 hash 值
is_hist_hash_identity —> get_usertable_hash_sum
—> get_histtable_hash_sum
/*
-
is_hist_hash_identity -- check whether user table hash and history table hash are equal
-
relid: user table oid
-
res_hash: hash sum of history table
*/
bool is_hist_hash_identity(Oid relid, uint64 *res_hash)
{
uint64 user_hash_sum;
uint64 hist_hash_sum;
char hist_name[NAMEDATALEN];
char *rel_name = get_rel_name(relid);
if (!get_hist_name(relid, rel_name, hist_name)) {
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("get hist table name failed.")));
}
Oid histoid = get_relname_relid(hist_name, PG_BLOCKCHAIN_NAMESPACE);
if (!OidIsValid(histoid)) {
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("could not find hist table of "%s".", rel_name)));
}user_hash_sum = get_usertable_hash_sum(relid);
hist_hash_sum = get_histtable_hash_sum(histoid);*res_hash = hist_hash_sum;
return user_hash_sum == hist_hash_sum;
}
2、查詢防篡改使用者表 ledgernsp.usertable 與其對應的歷史表以及全域性區塊表中關於該表的記錄是否一致。
omm=# SELECT pg_catalog.ledger_gchain_check('ledgernsp', 'usertable');
ledger_gchain_check
t
(1 row)
校驗是否為賬本表ledger_usertable_check
校驗使用者許可權has_ledger_consistent_privilege
校驗歷史表 hash 值is_hist_hash_identity
計算/校驗全域性表 hash get_gchain_relhash_sum
/*
-
get_gchain_relhash_sum -- calculate relhash from gs_global_chain
-
relid: user table oid
*/
static uint64 get_gchain_relhash_sum(Oid relid)
{
uint64 relhash = 0;
HeapTuple tuple = NULL;/* scan the gs_global_chain catalog by relid */
Relation gchain_rel = heap_open(GsGlobalChainRelationId, AccessShareLock);
Form_gs_global_chain rdata = NULL;
TableScanDesc scan = heap_beginscan(gchain_rel, SnapshotNow, 0, NULL);
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) {
rdata = (Form_gs_global_chain)GETSTRUCT(tuple);
if (rdata == NULL || rdata->relid != relid) {
continue;
}
relhash += rdata->relhash;
}
heap_endscan(scan);
heap_close(gchain_rel, AccessShareLock);
return relhash;
}
歸檔賬本資料庫
你知道的那些事兒
官方文件
前提條件:
系統中需要有審計管理員或者具有審計管理員許可權的角色。
資料庫正常執行,並且對防篡改資料庫執行了一系列增、刪、改等操作,保證在查詢時段內有賬本操作記錄結果產生。
資料庫已經正確配置審計檔案的儲存路徑 audit_directory。
你不知道的那些事兒
基本操作
1、對指定使用者歷史表進行歸檔操作。
omm=# SELECT pg_catalog.ledger_hist_archive('ledgernsp', 'usertable');
ledger_hist_archive
t
(1 row)
omm=# SELECT * FROM blockchain.ledgernsp_usertable_hist;
rec_num | hash_ins | hash_del | pre_hash
---------+------------------+------------------+----------------------------------
4 | e78e75b00d396899 | 84e8bfc3b974e9cf | 6475a497b7a272a92bab012d7f3d615b
(1 row)
主要步驟如下:
Copy user history table.
Do unify and truncate.
sum all hash_ins and hash_del for unification.
Do real truncate.heap_truncate_one_rel
Do insertion for unified row.simple_heap_insert
Flush history hash table cache.
2、執行全域性區塊表匯出操作
omm=# SELECT * FROM gs_global_chain;
blocknum | dbname | username | starttime | relid | relnsp | relname | relhash | globalhash |
txcommand
----------+--------+----------+-------------------------------+-------+-----------+-----------+------------------+----------------------------------+----------------
1 | omm | omm | 2022-09-17 13:59:37.84824+00 | 16404 | ledgernsp | usertable | a41714001181a294 | 83927d11ba1fd678e8f4b0723a9cd5f2 | INSERT INTO led
gernsp.usertable VALUES(1, 'alex'), (2, 'bob'), (3, 'peter');
2 | omm | omm | 2022-09-17 13:59:51.723068+00 | 16404 | ledgernsp | usertable | b3a9ed0755131181 | b5ee73b6c20c817230182f6373c78e20 | UPDATE ledgerns
p.usertable SET name = 'bob2' WHERE id = 2;
3 | omm | omm | 2022-09-17 13:59:58.159596+00 | 16404 | ledgernsp | usertable | 0ae4b4e4ed2fcab5 | 0cc9938cf7f1ed7f7f1a03c29954380a | DELETE FROM led
gernsp.usertable WHERE id = 3;
(3 rows)
omm=# SELECT pg_catalog.ledger_gchain_archive();
ledger_gchain_archive
t
(1 row)
omm=# SELECT * FROM gs_global_chain;
blocknum | dbname | username | starttime | relid | relnsp | relname | relhash | globalhash | txcommand
----------+--------+----------+------------------------------+-------+-----------+-----------+------------------+----------------------------------+-----------
2 | omm | omm | 2022-09-17 13:59:37.84824+00 | 16404 | ledgernsp | usertable | 62a5b5ec53c47eca | 7252d09679b0b3836a2e63da17284ad5 | Archived.
(1 row)
gs_global_chain 主要處理流程:
Init and prepare bak dictionary.
Using CopyStmt to copy global chain.
Do unify and truncate.
Using hash table to do unify, each hash_entry refers to one relid informations.
Split gs_global_chain by relid, and accumulate rel_hash to a new record for each rel.
Do rel truncate.
Insert newest record to gchain order by relid.
Flush global_hash cache.
修復賬本資料庫
你知道的那些事兒
前提條件:
系統中需要有審計管理員或者具有審計管理員許可權的角色。
資料庫正常執行,並且對防篡改資料庫執行了一系列增、刪、改等操作,保證在查詢時段內有賬本操作記錄結果產生。
你不知道的那些事兒
基本操作
1、執行歷史表修復操作
omm=# select * from blockchain.ledgernsp_usertable_hist;
rec_num | hash_ins | hash_del | pre_hash
---------+------------------+------------------+----------------------------------
4 | e78e75b00d396899 | 84e8bfc3b974e9cf | 6475a497b7a272a92bab012d7f3d615b
(1 row)
omm=# SELECT pg_catalog.ledger_hist_repair('ledgernsp', 'usertable');
ledger_hist_repair
0000000000000000
(1 row)
2、執行全域性區塊表修復操作
omm=# select * from gs_global_chain ;
blocknum | dbname | username | starttime | relid | relnsp | relname | relhash | globalhash | txcommand
----------+--------+----------+------------------------------+-------+-----------+-----------+------------------+----------------------------------+-----------
2 | omm | omm | 2022-09-17 13:59:37.84824+00 | 16404 | ledgernsp | usertable | 62a5b5ec53c47eca | 7252d09679b0b3836a2e63da17284ad5 | Archived.
(1 row)
omm=# SELECT pg_catalog.ledger_gchain_repair('ledgernsp', 'usertable');
ledger_gchain_repair
62a5b5ec53c47eca
(1 row)
首先判斷使用者許可權,之後透過 get_gchain_relhash_sum 函式計算 relhash 欄位
/*
-
get_gchain_relhash_sum -- calculate relhash from gs_global_chain
-
relid: user table oid
*/
static uint64 get_gchain_relhash_sum(Oid relid)
{
uint64 relhash = 0;
HeapTuple tuple = NULL;/* scan the gs_global_chain catalog by relid */
Relation gchain_rel = heap_open(GsGlobalChainRelationId, AccessShareLock);
Form_gs_global_chain rdata = NULL;
TableScanDesc scan = heap_beginscan(gchain_rel, SnapshotNow, 0, NULL);
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) {
rdata = (Form_gs_global_chain)GETSTRUCT(tuple);
if (rdata == NULL || rdata->relid != relid) {
continue;
}
relhash += rdata->relhash;
}
heap_endscan(scan);
heap_close(gchain_rel, AccessShareLock);
return relhash;
}
主要是計算並修復gs_global_chain中的relhash欄位。
總結
賬本資料庫其實並不像我們想象的那麼複製,實際上就是利用了區塊鏈的最基本的原理,即當前記錄的特徵值 + 上一條記錄特徵值的 hash 值,再進行 hash。下一條與上一條記錄具有資料關聯性,形成“鏈”的結構,如果篡改了其中的資料,則會導致“鏈”斷開,導致不能與後面資料記錄形成 hash 關聯。_hist 表記錄了使用者表每一步資料變化的過程,gs_global_chain 表記錄了所有防篡改模式下對使用者表的操作記錄。使用者表結合_hist 和 global 表就能完整記錄和校驗。