關於MYSQL 5.6 super_read_only和Event Scheduler衝突導致啟動失敗

版本percona-server-5.6.22-72.0，這個版本肯定有這個問題
這個問題出現線上上我們將庫設定為super_read_only後啟動報錯，整個MYSQLD CRASH掉
2017-04-23 01:15:46 22577 [ERROR] Event Scheduler: Failed to open table mysql.event
2017-04-23 01:15:46 22577 [ERROR] Event Scheduler: Error while loading from disk.
2017-04-23 01:15:46 22577 [Note] Event Scheduler: Purging the queue. 0 events
2017-04-23 01:15:46 22577 [ERROR] Aborting
在5.6.25以及其他5.7的版本上並沒有這個問題。網上說可能是BUG但是沒有說出具體原因，也有
帖子說和super_read_only 有關，確實關閉super_read_only就不會再出問題，
但是為了找到問題，我想挖一挖程式碼如下：
首先要找到報錯源頭，首先找到Event Scheduler: Error while loading from disk.的位置如下：
這段程式碼出現在
events.cc的Events::init(my_bool opt_noacl_or_bootstrap) 方法下
if (event_queue->init_queue(thd) || load_events_from_db(thd) ||
(opt_event_scheduler == EVENTS_ON && scheduler->start(&err_no)))
{
sql_print_error("Event Scheduler: Error while loading from disk."); --這裡
res= TRUE; /* fatal error: request unireg_abort */
goto end;
}
這裡3個條件都可能引起這個錯誤：
1、初始化佇列失敗，函式介面event_queue->init_queue
2、載入event資料失敗，函式介面load_events_from_db
3、引數event_scheduler設定是否為ON，並且scheduler->start呼叫失敗

而錯誤 Event Scheduler: Failed to open table mysql.event 正是load_events_from_db(thd)這個
方法報出來的，檢視其部分程式碼
/*
NOTE: even if we run in read-only mode, we should be able to lock the
mysql.event table for writing. In order to achieve this, we should call
mysql_lock_tables() under the super user.

Same goes for transaction access mode.
Temporarily reset it to read-write.
*/--這裡原始碼也有不算清楚的解釋

saved_master_access= thd->security_ctx->master_access;
thd->security_ctx->master_access |= SUPER_ACL;
bool save_tx_read_only= thd->tx_read_only;
thd->tx_read_only= false;

ret= db_repository->open_event_table(thd, TL_WRITE, &table); --這裡的返回值進行判斷

thd->tx_read_only= save_tx_read_only;
thd->security_ctx->master_access= saved_master_access;

if (ret)
{
sql_print_error("Event Scheduler: Failed to open table mysql.event"); ---這裡
DBUG_RETURN(TRUE);
}
我們可以看到他的是透過呼叫db_repository->open_event_table(thd, TL_WRITE, &table)來
接收其返回值如果為true則報錯。接下來看open_event_table
bool
Event_db_repository::open_event_table(THD *thd, enum thr_lock_type lock_type,
TABLE **table)
{
TABLE_LIST tables;
DBUG_ENTER("Event_db_repository::open_event_table");

tables.init_one_table("mysql", 5, "event", 5, "event", lock_type);

if (open_and_lock_tables(thd, &tables, FALSE, MYSQL_LOCK_IGNORE_TIMEOUT))
DBUG_RETURN(TRUE);

*table= tables.table;
tables.table->use_all_columns();

if (table_intact.check(*table, &event_table_def))
{
close_thread_tables(thd);
my_error(ER_EVENT_OPEN_TABLE_FAILED, MYF(0));
DBUG_RETURN(TRUE);
}

DBUG_RETURN(FALSE);
}
大概這個函式會判斷
1、event表是否可以lock
2、event表是否損壞

最終會呼叫lock_tables-->mysql_lock_tables，如果mysql_lock_locks返回一個
NULL指標則報錯如下：

DEBUG_SYNC(thd, "before_lock_tables_takes_lock");

if (! (thd->lock= mysql_lock_tables(thd, start, (uint) (ptr - start), flags))) --如果mysql_lock_tables返回一個NULL給thd->lock，產生異常
DBUG_RETURN(TRUE);

DEBUG_SYNC(thd, "after_lock_tables_takes_lock");

接下來mysql_lock_tables 又呼叫lock_tables_check函式進行table
lock的檢查，如果lock_tables_check函式返回一個大於0的值則異常
那麼呼叫流程清晰了
Events::init-->Events::load_events_from_db-->open_event_table
-->open_and_lock_tables -->lock_tables-->mysql_lock_tables-->
lock_tables_check

最終我們分析掉這個版本的BUG由於lock_tables_check函式檢查返回異常
而導致這個錯誤。
函式呼叫棧如下:
#0 mysql_lock_tables (thd=0x1c0b5e0, tables=0x1b62ca0, count=1, flags=2048) at /home/percona-server-5.6.22-72.0/sql/lock.cc:296
#1 0x00000000007910c9 in lock_tables (thd=0x1c0b5e0, tables=0x7fffffffdae0, count=1, flags=2048) at /home/percona-server-5.6.22-72.0/sql/sql_base.cc:6125
#2 0x000000000079086f in open_and_lock_tables (thd=0x1c0b5e0, tables=0x7fffffffdae0, derived=false, flags=2048, prelocking_strategy=0x7fffffffda90)
at /home/percona-server-5.6.22-72.0/sql/sql_base.cc:5889
#3 0x0000000000781ed2 in open_and_lock_tables (thd=0x1c0b5e0, tables=0x7fffffffdae0, derived=false, flags=2048)
at /home/percona-server-5.6.22-72.0/sql/sql_base.h:477
#4 0x0000000000a26d20 in Event_db_repository::check_system_tables (thd=0x1c0b5e0) at /home/percona-server-5.6.22-72.0/sql/event_db_repository.cc:1202
#5 0x00000000008ff5fb in Events::init (opt_noacl_or_bootstrap=0 '\000') at /home/percona-server-5.6.22-72.0/sql/events.cc:858
#6 0x000000000063e21d in mysqld_main (argc=83, argv=0x18f4c58) at /home/percona-server-5.6.22-72.0/sql/mysqld.cc:5784
#7 0x0000000000632634 in main (argc=11, argv=0x7fffffffe398) at /home/percona-server-5.6.22-72.0/sql/main.cc:25

跟蹤lock_tables_check函式發現如下

198 if (!(flags & MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY) && !t->s->tmp_table)
(gdb) n
200 if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE &&
(gdb) n
204 opt_super_readonly ? "--read-only (super)" : "--read-only");
(gdb) n
205 DBUG_RETURN(1);
上面由於這段程式碼：
200 if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE &&
201 enforce_ro && opt_readonly && !thd->slave_thread)
202 {
203 my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
204 opt_super_readonly ? "--read-only (super)" : "--read-only");
205 DBUG_RETURN(1);
206 }

if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && enforce_ro && opt_readonly && !thd->slave_thread)
條件滿足返回了 DBUG_RETURN(1); 造成整個報錯
而這裡
enforce_ro 起到了關鍵作用，也是問題的根源，相信和這裡有關
if (!opt_super_readonly)
enforce_ro= !(thd->security_ctx->master_access & SUPER_ACL);

(gdb) p t->alias
$12 = 0x1c5b4d0 "event"
(gdb) list
200 if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE &&
201 enforce_ro && opt_readonly && !thd->slave_thread)
202 {
203 my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
204 opt_super_readonly ? "--read-only (super)" : "--read-only");
205 DBUG_RETURN(1);
206 }
207 }
208 }
209
(gdb) p enforce_ro
$13 = true
(gdb) p t->alias
$15 = 0x1c5b4d0 "event"
(gdb) p thd->security_ctx->master_access
$16 = 32768

我們研究下
lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags)
{
uint system_count= 0, i= 0;
bool enforce_ro= true;
/*
Identifies if the executed sql command can updated either a log
or rpl info table.
*/
bool log_table_write_query= false;

DBUG_ENTER("lock_tables_check");

if (!opt_super_readonly)
enforce_ro= !(thd->security_ctx->master_access & SUPER_ACL);
後面省略

這句問題的根源，預設enforce_ro= true;
如果opt_super_readonly開啟則
enforce_ro= !(thd->security_ctx->master_access & SUPER_ACL);不執行
那麼enforce_ro= true

如果opt_super_readonly不開啟則
enforce_ro= !(thd->security_ctx->master_access & SUPER_ACL);執行
SUPER_ACL是一個宏#define SUPER_ACL (1L << 15)
當做4位元組INT的話,及
1000 0000 0000 0000
而thd->security_ctx->master_access是32768及
1000 0000 0000 0000
顯然他們按位與得到是1 及ture，然後!true
所以enforce_ro= false;
如果為flase則
200 if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE &&
201 enforce_ro && opt_readonly && !thd->slave_thread)
202 {
203 my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
204 opt_super_readonly ? "--read-only (super)" : "--read-only");
205 DBUG_RETURN(1);
206 }
不會執行，則不會再次報錯

所以解決這個問題或者說BUG，就是設定如果opt_super_readonly不開啟，
就是不要設定super_read_only許可權。

在5.7.14版本中，我粗略檢視lock_tables_check程式碼，有改動。5.7.14沒遇到這個問題

if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE &&
check_readonly(thd, true))
DBUG_RETURN(1);

可以看到 enforce_ro已經不再作為判斷的標準

而5.6.22這個版本確實有這個問題，但是這個問題不是每個版本都有。如果遇到可以參考。

附帶5.6.22，5.7.14程式碼：
5.6.22

點選(此處)摺疊或開啟

static int
lock_tables_check(THD *thd, TABLE **tables, uint count, uint flags)
{
uint system_count= 0, i= 0;
bool enforce_ro= true;
/*
Identifies if the executed sql command can updated either a log
or rpl info table.
*/
bool log_table_write_query= false;
DBUG_ENTER("lock_tables_check");
if (!opt_super_readonly)
enforce_ro= !(thd->security_ctx->master_access & SUPER_ACL);
log_table_write_query=
is_log_table_write_query(thd->lex->sql_command);
for (i=0 ; i<count; i++)
{
TABLE *t= tables[i];
/* Protect against 'fake' partially initialized TABLE_SHARE */
DBUG_ASSERT(t->s->table_category != TABLE_UNKNOWN_CATEGORY);
/*
Table I/O to performance schema tables is performed
only internally by the server implementation.
When a user is requesting a lock, the following
constraints are enforced:
*/
if (t->s->table_category == TABLE_CATEGORY_LOG &&
(flags & MYSQL_LOCK_LOG_TABLE) == 0 &&
!log_table_write_query)
{
/*
A user should not be able to prevent writes,
or hold any type of lock in a session,
since this would be a DOS attack.
*/
if (t->reginfo.lock_type >= TL_READ_NO_INSERT ||
thd->lex->sql_command == SQLCOM_LOCK_TABLES)
{
my_error(ER_CANT_LOCK_LOG_TABLE, MYF(0));
DBUG_RETURN(1);
}
}
if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE)
{
if (t->s->table_category == TABLE_CATEGORY_SYSTEM)
system_count++;
if (t->db_stat & HA_READ_ONLY)
{
my_error(ER_OPEN_AS_READONLY, MYF(0), t->alias);
DBUG_RETURN(1);
}
}
/*
If we are going to lock a non-temporary table we must own metadata
lock of appropriate type on it (I.e. for table to be locked for
write we must own metadata lock of MDL_SHARED_WRITE or stronger
type. For table to be locked for read we must own metadata lock
of MDL_SHARED_READ or stronger type).
The only exception are HANDLER statements which are allowed to
lock table for read while having only MDL_SHARED lock on it.
*/
DBUG_ASSERT(t->s->tmp_table ||
thd->mdl_context.is_lock_owner(MDL_key::TABLE,
t->s->db.str, t->s->table_name.str,
t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE ?
MDL_SHARED_WRITE : MDL_SHARED_READ) ||
(t->open_by_handler &&
thd->mdl_context.is_lock_owner(MDL_key::TABLE,
t->s->db.str, t->s->table_name.str,
MDL_SHARED)));
/*
Prevent modifications to base tables if READ_ONLY is activated.
In any case, read only does not apply to temporary tables.
*/
if (!(flags & MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY) && !t->s->tmp_table)
{
if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE &&
enforce_ro && opt_readonly && !thd->slave_thread)
{
my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0),
opt_super_readonly ? "--read-only (super)" : "--read-only");
DBUG_RETURN(1);
}
}
}
/*
Locking of system tables is restricted:
locking a mix of system and non-system tables in the same lock
is prohibited, to prevent contention.
*/
if ((system_count > 0) && (system_count < count))
{
my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0));
DBUG_RETURN(1);
}
DBUG_RETURN(0);
}

5.7.14

點選(此處)摺疊或開啟

static int
lock_tables_check(THD *thd, TABLE **tables, size_t count, uint flags)
{
uint system_count= 0, i= 0;
/*
Identifies if the executed sql command can updated either a log
or rpl info table.
*/
bool log_table_write_query= false;
DBUG_ENTER("lock_tables_check");
log_table_write_query=
is_log_table_write_query(thd->lex->sql_command);
for (i=0 ; i<count; i++)
{
TABLE *t= tables[i];
/* Protect against 'fake' partially initialized TABLE_SHARE */
DBUG_ASSERT(t->s->table_category != TABLE_UNKNOWN_CATEGORY);
/*
Table I/O to performance schema tables is performed
only internally by the server implementation.
When a user is requesting a lock, the following
constraints are enforced:
*/
if (t->s->table_category == TABLE_CATEGORY_LOG &&
(flags & MYSQL_LOCK_LOG_TABLE) == 0 &&
!log_table_write_query)
{
/*
A user should not be able to prevent writes,
or hold any type of lock in a session,
since this would be a DOS attack.
*/
if (t->reginfo.lock_type >= TL_READ_NO_INSERT ||
thd->lex->sql_command == SQLCOM_LOCK_TABLES)
{
my_error(ER_CANT_LOCK_LOG_TABLE, MYF(0));
DBUG_RETURN(1);
}
}
if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE)
{
if (t->s->table_category == TABLE_CATEGORY_SYSTEM)
system_count++;
if (t->db_stat & HA_READ_ONLY)
{
my_error(ER_OPEN_AS_READONLY, MYF(0), t->alias);
DBUG_RETURN(1);
}
}
/*
If we are going to lock a non-temporary table we must own metadata
lock of appropriate type on it (I.e. for table to be locked for
write we must own metadata lock of MDL_SHARED_WRITE or stronger
type. For table to be locked for read we must own metadata lock
of MDL_SHARED_READ or stronger type).
*/
DBUG_ASSERT(t->s->tmp_table ||
thd->mdl_context.owns_equal_or_stronger_lock(MDL_key::TABLE,
t->s->db.str, t->s->table_name.str,
t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE ?
MDL_SHARED_WRITE : MDL_SHARED_READ));
/*
Prevent modifications to base tables if READ_ONLY is activated.
In any case, read only does not apply to temporary tables.
*/
if (!(flags & MYSQL_LOCK_IGNORE_GLOBAL_READ_ONLY) && !t->s->tmp_table)
{
if (t->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE &&
check_readonly(thd, true))
DBUG_RETURN(1);
}
}
/*
Locking of system tables is restricted:
locking a mix of system and non-system tables in the same lock
is prohibited, to prevent contention.
*/
if ((system_count > 0) && (system_count < count))
{
my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0));
DBUG_RETURN(1);
}
DBUG_RETURN(0);
}

作者微信：

關於MYSQL 5.6 super_read_only和Event Scheduler衝突導致啟動失敗

相關文章