InnoDB Redo的结构漫谈(1) 作者:@明天会更好(群号:196380905) 1.Redo 日志: redo日志的结构是物理到页,逻辑到记录;采用的是物理逻辑日志。innodb redo日志是基于mini-transaction实现的。 每个重做日志块是512个字节,也就意味着log buffer中也是以512个字节的块组织的。每个mini-transaction 会有一 个mtr的结构,这个mtr的结构封装了未被写入到log buffer的日志,mtr结构有log成员,该成员就是日志的结构。log_t 是代表log buffer的结构,mysqld运行实例中只维护一个sys_log实例(lot_t sys_log). sys_log维护了我们制定的innodb_log_buffer_size制定的大小的redo buffer.该内存区域在sys_log->buf维护了这个内存区域。 redo buffer 有如下三个结构: |----------------| | block header | |----------------| | records logs | |----------------| | block trialer | |----------------| (log block 结构) |----------|----------|----------|----------| |log block |log block |log block |log block | |----------|----------|----------|----------| (log buffer 组织方式) log buffer 中的头和尾部在源码中定义: /* Offsets of a log block header */ #define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and is allowed to wrap around at 2G; the highest bit is set to 1 if this is the first log block in a log flush write segment */ #define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL /* mask used to get the highest bit in the preceding field */ #define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to this block */ #define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an mtr log record group in this log block, 0 if none; if the value is the same as LOG_BLOCK_HDR_DATA_LEN, it means that the first rec group has not yet been catenated to this log block, but if it will, it will start at this offset; an archive recovery can start parsing the log records starting from this offset in this log block, if value not 0 */ #define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of log_sys->next_checkpoint_no when the log block was last written to: if the block has not yet been written full, this value is only updated before a log buffer flush */ #define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in bytes */ /* Offsets of a log block trailer from the end of the block */ #define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block contents; in InnoDB versions < 3.23.52 this did not contain the checksum but the same value as .._HDR_NO */ #define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */ mini-transactions是mtr的缩写,mtr会把一组log record集中起来,然后写入到log buffer. /* Mini-transaction handle and buffer */ struct mtr_t{ #ifdef UNIV_DEBUG ulint state; /*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */ #endif dyn_array_t memo; /*!< memo stack for locks etc. */ dyn_array_t log; /*!< mini-transaction log */ unsigned inside_ibuf:1; /*!< TRUE if inside ibuf changes */ unsigned modifications:1; /*!< TRUE if the mini-transaction modified buffer pool pages */ unsigned made_dirty:1; /*!< TRUE if mtr has made at least one buffer pool page dirty */ ulint n_log_recs; /* count of how many page initial log records have been written to the mtr log */ ulint n_freed_pages; /* number of pages that have been freed in this mini-transaction */ ulint log_mode; /* specifies which operations should be logged; default value MTR_LOG_ALL */ lsn_t start_lsn;/* start lsn of the possible log entry for this mtr */ lsn_t end_lsn;/* end lsn of the possible log entry for this mtr */ #ifdef UNIV_DEBUG ulint magic_n; #endif /* UNIV_DEBUG */ }; dyn_block_t的定义: /** A block in a dynamically allocated array */ struct dyn_block_t; /** Dynamically allocated array */ typedef dyn_block_t dyn_array_t; /** This is the initial 'payload' size of a dynamic array; this must be > MLOG_BUF_MARGIN + 30! */ #define DYN_ARRAY_DATA_SIZE 512 /*#################################################################*/ /** @brief A block in a dynamically allocated array. NOTE! Do not access the fields of the struct directly: the definition appears here only for the compiler to know its size! */ struct dyn_block_t{ mem_heap_t* heap; /*!< in the first block this is != NULL if dynamic allocation has been needed */ ulint used; /*!< number of data bytes used in this block; DYN_BLOCK_FULL_FLAG is set when the block becomes full */ byte data[DYN_ARRAY_DATA_SIZE]; //真实的日志数据 /*!< storage for array elements */ UT_LIST_BASE_NODE_T(dyn_block_t) base; /*!< linear list of dyn blocks: this node is used only in the first block */ UT_LIST_NODE_T(dyn_block_t) list; /*!< linear list node: used in all blocks */ #ifdef UNIV_DEBUG ulint buf_end;/*!< only in the debug version: if dyn array is opened, this is the buffer end offset, else this is 0 */ ulint magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */ #endif }; 2.mini-transaction log的调用方法的逻辑: mtr_commit ->mtr_log_reserve_and_write ->log_write_low //该函数是把mtr->log写入到空闲的log buffer中 /************************************************************//** Writes the contents of a mini-transaction log, if any, to the database log. */ static void mtr_log_reserve_and_write(mtr_t* mtr) /*!< in/out: mtr */ ---> /************************************************************//** Writes to the log the string given. It is assumed that the caller holds the log mutex. */ UNIV_INTERN void log_write_low(byte* str/*!< in: string */, ulint str_len/*!< in: string length */)