MySQL启动过程中 GTID 的处理:
1. 在核心模块启动函数 init_server_components() 会调用 gtid_server_init() 初始化 gtid server
2. 调用 init_server_auto_options() 初始化并获取数据库的 server_uuid,从 auto.cnf 文件中读取,如果没有则重新生成
3. 调用 gtid_state->init() 将 server_uuid 添加到 sid_map 中。
4. 读取 mysql.gtid_executed表,读取 mysql.gtid_executed 的值
5. 接下来,要从 mysql.gtid_executed 表和 binlog 文件中初始化 gtid_executed 和 gtid_purged 的值
6. 对 mysql.gtid_executed 表中的 gtid_executed 进行修正,更新 lost_gtids & gtids_only_in_table & previous_gtids_logged 的值
7. 将 previous_gtids_logged 写入最新的binlog文件
涉及到的源码如下:
// 在init_server_components() 函数中会调用 gtid_server_init(), 创建 global_sid_lock & global_mode_lock & global_sid_map & gtid_state & gtid_table_persistor if (init_server_components()) unireg_abort(MYSQLD_ABORT_EXIT); /* Each server should have one UUID. We will create it automatically, if it does not exist. 初始化并获取数据库的 server_uuid, 从 auto.cnf 文件中获取,如果没有则重新生成。 */ if (init_server_auto_options()) { sql_print_error("Initialization of the server's UUID failed because it could" " not be read from the auto.cnf file. If this is a new" " server, the initialization failed because it was not" " possible to generate a new UUID."); unireg_abort(MYSQLD_ABORT_EXIT); } /* Add server_uuid to the sid_map. This must be done after server_uuid has been initialized in init_server_auto_options and after the binary log (and sid_map file) has been initialized in init_server_components(). No error message is needed: init_sid_map() prints a message. Strictly speaking, this is not currently needed when opt_bin_log==0, since the variables that gtid_state->init initializes are not currently used in that case. But we call it regardless to avoid possible future bugs if gtid_state ever needs to do anything else. */ global_sid_lock->wrlock(); // 将 server_uuid 添加到 sid_map 中 int gtid_ret = gtid_state->init(); global_sid_lock->unlock(); if (gtid_ret) unireg_abort(MYSQLD_ABORT_EXIT); // Initialize executed_gtids from mysql.gtid_executed table. if (gtid_state->read_gtid_executed_from_table() == -1) unireg_abort(1); if (opt_bin_log) { /* Initialize GLOBAL.GTID_EXECUTED and GLOBAL.GTID_PURGED from gtid_executed table and binlog files during server startup. */ // 获取 gtid_state.executed_gtids, gtid_state.lost_gtids, gtid_state.gtids_only_in_table, gtid_state.previous_gtids_logged的指针 Gtid_set *executed_gtids = const_cast<Gtid_set *>(gtid_state->get_executed_gtids()); Gtid_set *lost_gtids = const_cast<Gtid_set *>(gtid_state->get_lost_gtids()); Gtid_set *gtids_only_in_table = const_cast<Gtid_set *>(gtid_state->get_gtids_only_in_table()); Gtid_set *previous_gtids_logged = const_cast<Gtid_set *>(gtid_state->get_previous_gtids_logged()); // 定义中间变量, 包括 binlog中purge掉的 gtids, binlog中包含的 gtids, binlog中包含但是不在表中的 gtids Gtid_set purged_gtids_from_binlog(global_sid_map, global_sid_lock); Gtid_set gtids_in_binlog(global_sid_map, global_sid_lock); Gtid_set gtids_in_binlog_not_in_table(global_sid_map, global_sid_lock); // 从 binlog 文件中读取 gtids_in_binlog 和 purged_gtids_from_binlog if (mysql_bin_log.init_gtid_sets(>ids_in_binlog, &purged_gtids_from_binlog, opt_master_verify_checksum, true /*true=need lock*/, NULL /*trx_parser*/, NULL /*gtid_partial_trx*/, true /*is_server_starting*/)) unireg_abort(MYSQLD_ABORT_EXIT); global_sid_lock->wrlock(); purged_gtids_from_binlog.dbug_print("purged_gtids_from_binlog"); gtids_in_binlog.dbug_print("gtids_in_binlog"); // 如果 gtids_in_binlog 不是空的, 并且从表中读取的 executed_gtids 是 gtids_in_binlog 的子集 if (!gtids_in_binlog.is_empty() && !gtids_in_binlog.is_subset(executed_gtids)) { gtids_in_binlog_not_in_table.add_gtid_set(>ids_in_binlog); if (!executed_gtids->is_empty()) // 更新 gtids_in_binlog_not_in_table gtids_in_binlog_not_in_table.remove_gtid_set(executed_gtids); /* Save unsaved GTIDs into gtid_executed table, in the following four cases: 1. the upgrade case. 2. the case that a slave is provisioned from a backup of the master and the slave is cleaned by RESET MASTER and RESET SLAVE before this. 3. the case that no binlog rotation happened from the last RESET MASTER on the server before it crashes. 4. The set of GTIDs of the last binlog is not saved into the gtid_executed table if server crashes, so we save it into gtid_executed table and executed_gtids during recovery from the crash. */ // 将 gtids_in_binlog_not_in_table 存储到 mysql.gtid_executed 表中 if (gtid_state->save(>ids_in_binlog_not_in_table) == -1) { global_sid_lock->unlock(); unireg_abort(MYSQLD_ABORT_EXIT); } // 在内存中的 executed_gtids 中加入 gtids_in_binlog_not_in_table gtid 集合 executed_gtids->add_gtid_set(>ids_in_binlog_not_in_table); } /* gtids_only_in_table= executed_gtids - gtids_in_binlog */ if (gtids_only_in_table->add_gtid_set(executed_gtids) != RETURN_STATUS_OK) { global_sid_lock->unlock(); unireg_abort(MYSQLD_ABORT_EXIT); } // gtids_only_in_table gtids_only_in_table->remove_gtid_set(>ids_in_binlog); /* lost_gtids = executed_gtids - (gtids_in_binlog - purged_gtids_from_binlog) = gtids_only_in_table + purged_gtids_from_binlog; */ assert(lost_gtids->is_empty()); // 获取 lost_gtids, 也就是 gtid_purged 的值 if (lost_gtids->add_gtid_set(gtids_only_in_table) != RETURN_STATUS_OK || lost_gtids->add_gtid_set(&purged_gtids_from_binlog) != RETURN_STATUS_OK) { global_sid_lock->unlock(); unireg_abort(MYSQLD_ABORT_EXIT); } /* Prepare previous_gtids_logged for next binlog previous_gtids_logged */ if (previous_gtids_logged->add_gtid_set(>ids_in_binlog) != RETURN_STATUS_OK) { global_sid_lock->unlock(); unireg_abort(MYSQLD_ABORT_EXIT); } /* Write the previous set of gtids at this point because during the creation of the binary log this is not done as we cannot move the init_gtid_sets() to a place before openning the binary log. This requires some investigation. 将 previous_gtids_logged 写入最新的 binlog。 /Alfranio */ Previous_gtids_log_event prev_gtids_ev(>ids_in_binlog); global_sid_lock->unlock(); (prev_gtids_ev.common_footer)->checksum_alg = static_cast<enum_binlog_checksum_alg>(binlog_checksum_options); if (prev_gtids_ev.write(mysql_bin_log.get_log_file())) unireg_abort(MYSQLD_ABORT_EXIT); mysql_bin_log.add_bytes_written( prev_gtids_ev.common_header->data_written); if (flush_io_cache(mysql_bin_log.get_log_file()) || mysql_file_sync(mysql_bin_log.get_log_file()->file, MYF(MY_WME))) unireg_abort(MYSQLD_ABORT_EXIT); mysql_bin_log.update_binlog_end_pos(); #ifdef HAVE_REPLICATION if (opt_bin_log && expire_logs_days) { time_t purge_time = server_start_time - expire_logs_days * 24 * 60 * 60; DBUG_EXECUTE_IF("expire_logs_always_at_start", { purge_time = my_time(0); }); if (purge_time >= 0) mysql_bin_log.purge_logs_before_date(purge_time, true); } #endif (void)RUN_HOOK(server_state, after_engine_recovery, (NULL)); }
从 binlog 文件中读取 gtids_in_binlog 和 purged_gtids_from_binlog的init_gtid_sets() 函数代码解析如下:
bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids, bool verify_checksum, bool need_lock, Transaction_boundary_parser *trx_parser, Gtid *gtid_partial_trx, bool is_server_starting) { /* Acquires the necessary locks to ensure that logs are not either removed or updated when we are reading from it. */ if (need_lock) { // We don't need LOCK_log if we are only going to read the initial // Prevoius_gtids_log_event and ignore the Gtid_log_events. if (all_gtids != NULL) mysql_mutex_lock(&LOCK_log); mysql_mutex_lock(&LOCK_index); global_sid_lock->wrlock(); } else { if (all_gtids != NULL) mysql_mutex_assert_owner(&LOCK_log); mysql_mutex_assert_owner(&LOCK_index); global_sid_lock->assert_some_wrlock(); } // Gather the set of files to be accessed. list<string> filename_list; LOG_INFO linfo; int error; list<string>::iterator it; list<string>::reverse_iterator rit; bool reached_first_file = false; /* Initialize the sid_map to be used in read_gtids_from_binlog */ Sid_map *sid_map = NULL; if (all_gtids) sid_map = all_gtids->get_sid_map(); else if (lost_gtids) sid_map = lost_gtids->get_sid_map(); // 将当前的 binlog name加入到 filename_list 中 for (error = find_log_pos(&linfo, NULL, false /*need_lock_index=false*/); !error; error = find_next_log(&linfo, false /*need_lock_index=false*/)) { DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name)); filename_list.push_back(string(linfo.log_file_name)); } if (error != LOG_INFO_EOF) { DBUG_PRINT("error", ("Error reading %s index", is_relay_log ? "relaylog" : "binlog")); goto end; } /* On server starting, one new empty binlog file is created and its file name is put into index file before initializing GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the last binlog file before the server restarts, so we remove its file name from filename_list. 在 server 启动时, 会创建一个新的空 binlog 文件, 并将他的文件名加入 binlog index 文件, 因为这个文件不是 mysql restart之前的最后一个binlog文件, 所以需要从 filename list中移除。 */ if (is_server_starting && !is_relay_log && !filename_list.empty()) filename_list.pop_back(); error = 0; // 数据库启动时不会是 NULL, 但是 purge binary logs等删除 binlog 文件的命令时是 NULL if (all_gtids != NULL) { DBUG_PRINT("info", ("Iterating backwards through %s logs, " "looking for the last %s log that contains " "a Previous_gtids_log_event.", is_relay_log ? "relay" : "binary", is_relay_log ? "relay" : "binary")); // Iterate over all files in reverse order until we find one that // contains a Previous_gtids_log_event. // 反向迭代指向 filename list的尾部。 rit = filename_list.rbegin(); bool can_stop_reading = false; // 如果只有一个 binlog 文件则为 true reached_first_file = (rit == filename_list.rend()); // DBUG_PRINT("info", ("filename='%s' reached_first_file=%d", reached_first_file ? "" : rit->c_str(), reached_first_file)); // 通过反向循环扫描来获取 gtids_in_binlog 集合。 while (!can_stop_reading && !reached_first_file) { // 获取文件名 const char *filename = rit->c_str(); assert(rit != filename_list.rend()); rit++; // 如果到达第一个文件则为 true, 表示扫描完成 reached_first_file = (rit == filename_list.rend()); DBUG_PRINT("info", ("filename='%s' can_stop_reading=%d " "reached_first_file=%d, ", filename, can_stop_reading, reached_first_file)); // 通过 read_gtids_from_binlog 来读取 binlog 文件 switch (read_gtids_from_binlog(filename, all_gtids, reached_first_file ? lost_gtids : NULL, NULL /* first_gtid */, sid_map, verify_checksum, is_relay_log)) { case ERROR: { error = 1; goto end; } case GOT_GTIDS: { //如果扫描本binlog有PREVIOUS GTID EVENT和GTID EVENT 则跳出循环直达end can_stop_reading = true; break; } case GOT_PREVIOUS_GTIDS: { // 如果扫描本binlog有PREVIOUS GTID EVENT, 但是没有 GTID EVENT /* If this is a binlog file, it is enough to have GOT_PREVIOUS_GTIDS. If this is a relaylog file, we need to find at least one GTID to start parsing the relay log to add GTID of transactions that might have spanned in distinct relaylog files. 如果这是一个 binlog 文件, 那么存在 PREVIOUS GTID EVENT 就足够了。 如果这是 relay log文件, 我们需要找到至少一个 GTID 来解析 relay log, 以添加到 可能跨越不同 relay log文件的事务中。 */ if (!is_relay_log) can_stop_reading = true; break; } case NO_GTIDS: { // 这里如果binlog不包含GTID EVENT和PREVIOUS GTID EVENT /* Mysql server iterates backwards through binary logs, looking for the last binary log that contains a Previous_gtids_log_event for gathering the set of gtid_executed on server start. This may take very long time if it has many binary logs and almost all of them are out of filesystem cache. So if the binlog_gtid_simple_recovery is enabled, and the last binary log does not contain any GTID event, do not read any more binary logs, GLOBAL.GTID_EXECUTED and GLOBAL.GTID_PURGED should be empty in the case. 如果 binlog_gtid_simple_recovery 是 on, 并且最后一个 binary log 文件中没有 任何 GTID event, 那么不需要再读取binlog 文件。global.gtid_executed 和 gtid_purged 都应该是空的。 */ if (binlog_gtid_simple_recovery && is_server_starting && !is_relay_log) { assert(all_gtids->is_empty()); assert(lost_gtids->is_empty()); goto end; } /*FALLTHROUGH*/ } case TRUNCATED: { break; } } } /* If we use GTIDs and have partial transactions on the relay log, must check if it ends on next relay log files. We also need to feed the boundary parser with the rest of the relay log to put it in the correct state before receiving new events from the master in the case of GTID auto positioning be disabled. 如果我们使用 GTID 并且在 relay log中存在部分事务, 那么必须检查事务是否在下一个relay log 文件中结束。 */ if (is_relay_log && filename_list.size() > 0) { /* Suppose the following relaylog: rl-bin.000001 | rl-bin.000002 | rl-bin.000003 | rl-bin-000004 ---------------+---------------+---------------+--------------- PREV_GTIDS | PREV_GTIDS | PREV_GTIDS | PREV_GTIDS (empty) | (UUID:1) | (UUID:1) | (UUID:1) ---------------+---------------+---------------+--------------- GTID(UUID:1) | QUERY(INSERT) | QUERY(INSERT) | XID ---------------+---------------+---------------+--------------- QUERY(CREATE | TABLE t1 ...) | ---------------+ GTID(UUID:2) | ---------------+ QUERY(BEGIN) | ---------------+ As it is impossible to determine the current Retrieved_Gtid_Set by only looking to the PREVIOUS_GTIDS on the last relay log file, and scanning events on it, we tried to find a relay log file that contains at least one GTID event during the backwards search. In the example, we will find a GTID only in rl-bin.000001, as the UUID:2 transaction was spanned across 4 relay log files. The transaction spanning can be caused by "FLUSH RELAY LOGS" commands on slave while it is queuing the transaction. So, in order to correctly add UUID:2 into Retrieved_Gtid_Set, we need to parse the relay log starting on the file we found the last GTID queued to know if the transaction was fully retrieved or not. */ /* Adjust the reverse iterator to point to the relaylog file we need to start parsing, as it was incremented after generating the relay log file name. */ assert(rit != filename_list.rbegin()); rit--; assert(rit != filename_list.rend()); /* Reset the transaction parser before feeding it with events */ trx_parser->reset(); gtid_partial_trx->clear(); DBUG_PRINT("info", ("Iterating forwards through relay logs, " "updating the Retrieved_Gtid_Set and updating " "IO thread trx parser before start.")); for (it = find(filename_list.begin(), filename_list.end(), *rit); it != filename_list.end(); it++) { const char *filename = it->c_str(); DBUG_PRINT("info", ("filename='%s'", filename)); if (read_gtids_and_update_trx_parser_from_relaylog(filename, all_gtids, true, trx_parser, gtid_partial_trx)) { error = 1; goto end; } } } } /** 正向循环查找 purged_gtids_from_binlog */ // 如果前面的反向循环没有迭代到第一个 binlog 文件 if (lost_gtids != NULL && !reached_first_file) { /* This branch is only reacheable by a binary log. The relay log don't need to get lost_gtids information. A 5.6 server sets GTID_PURGED by rotating the binary log. A 5.6 server that had recently enabled GTIDs and set GTID_PURGED would have a sequence of binary logs like: master-bin.N : No PREVIOUS_GTIDS (GTID wasn't enabled) master-bin.N+1: Has an empty PREVIOUS_GTIDS and a ROTATE (GTID was enabled on startup) master-bin.N+2: Has a PREVIOUS_GTIDS with the content set by a SET @@GLOBAL.GTID_PURGED + has GTIDs of some transactions. If this 5.6 server be upgraded to 5.7 keeping its binary log files, this routine will have to find the first binary log that contains a PREVIOUS_GTIDS + a GTID event to ensure that the content of the GTID_PURGED will be correctly set (assuming binlog_gtid_simple_recovery is not enabled). */ DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for " "the first binary log that contains both a " "Previous_gtids_log_event and a Gtid_log_event.")); assert(!is_relay_log); // 正向循环查找 for (it = filename_list.begin(); it != filename_list.end(); it++) { /* We should pass a first_gtid to read_gtids_from_binlog when binlog_gtid_simple_recovery is disabled, or else it will return right after reading the PREVIOUS_GTIDS event to avoid stall on reading the whole binary log. */ Gtid first_gtid = {0, 0}; const char *filename = it->c_str(); DBUG_PRINT("info", ("filename='%s'", filename)); // 通过函数 read_gtids_from_binlog 读取 binlog 文件 switch (read_gtids_from_binlog(filename, NULL, lost_gtids, binlog_gtid_simple_recovery ? NULL : &first_gtid, sid_map, verify_checksum, is_relay_log)) { case ERROR: { error = 1; /*FALLTHROUGH*/ } case GOT_GTIDS: { // 读取 binlog 文件扫描到了 PREVIOUS GTID EVENT和GTID EVENT, 跳出循环 goto end; } case NO_GTIDS: // 没有找到 PREVIOUS GTID EVENT 和 GTID EVENT则和 GOT_PREVIOUS_GTIDS的处理方式一致。 case GOT_PREVIOUS_GTIDS: { /* Mysql server iterates forwards through binary logs, looking for the first binary log that contains both Previous_gtids_log_event and gtid_log_event for gathering the set of gtid_purged on server start. It also iterates forwards through binary logs, looking for the first binary log that contains both Previous_gtids_log_event and gtid_log_event for gathering the set of gtid_purged when purging binary logs. This may take very long time if it has many binary logs and almost all of them are out of filesystem cache. So if the binlog_gtid_simple_recovery is enabled, we just initialize GLOBAL.GTID_PURGED from the first binary log, do not read any more binary logs. 如果只有 PREVIOUS GTID EVENT 并且设置了 binlog_gtid_simple_recovery, 只读取第一个 binlog 文件来 初始化 GLOBAL.GTID_PURGED。 */ if (binlog_gtid_simple_recovery) goto end; /*FALLTHROUGH*/ } case TRUNCATED: { break; } } } } end: if (all_gtids) all_gtids->dbug_print("all_gtids"); if (lost_gtids) lost_gtids->dbug_print("lost_gtids"); if (need_lock) { global_sid_lock->unlock(); mysql_mutex_unlock(&LOCK_index); if (all_gtids != NULL) mysql_mutex_unlock(&LOCK_log); } filename_list.clear(); DBUG_PRINT("info", ("returning %d", error)); DBUG_RETURN(error != 0 ? true : false); }