xv6——文件系统：磁盘的LOG日志机制

WAL机制
xv6的log机制原理图
数据结构
- 磁盘上的log header
- 内存中的log结构体
函数实现

作者：殷某人
更新时间：2022/07/03

WAL机制

xv6操作系统中的log机制，目的是为了在系统 cash的时候，可以达到：

数据的一致性，达到事务的原子性目的：要么写入全部成功，要么写入全部失败
提供发生故障时，数据恢复机制。

本质原理就是遵守 Wirte Ahead Log原则 , 即系统首先将所有写操作以及写的内容记录在log中，然后写入到文件的实际位置。

xv6的log机制原理图

数据结构

磁盘上的log header

log header，位于磁盘LOG区的第一个BLOCK块上，它里面记录了当前保存在LOG区的block块的数目以及block块的索引值。

struct logheader {
  int n;
  int block[LOGSIZE];
};

内存中的log结构体

它的功能为：在存中储存上log区的属性信息和磁盘log header的缓存映射，以及操作过程中的控制信息。

struct log {
  struct spinlock lock;
  int start;       // log信息在磁盘上的位置（开始的block块的索引号)
  int size;        // log区的总的block块的数目。
  int outstanding; // 当前正在使用LOG机制的文件系统调用数目(目的是别超过了LOG系统总容量)
  int committing;  // 当前是不是正处于LOG的提交中,也就是正在写LOG进入磁盘呢
  int dev;
  struct logheader lh;  // 磁盘logheader在内存中的一份映射
};

函数实现

初始化log系统 `initlog`

读磁盘上的超级块，载入log区的信息，包含大小、起始block、logheader信息
如果存在log缓存，说明发生过crash，进行数据恢复。

void
initlog(int dev)
{
  if (sizeof(struct logheader) >= BSIZE)
    panic("initlog: too big logheader");

  struct superblock sb;
  initlock(&log.lock, "log");
  readsb(dev, &sb);
  log.start = sb.logstart;
  log.size = sb.nlog;
  log.dev = dev;
  recover_from_log();
}

读写log header

static void
read_head(void)
{
  struct buf *buf = bread(log.dev, log.start);
  struct logheader *lh = (struct logheader *) (buf->data);
  int i;
  log.lh.n = lh->n;
  for (i = 0; i < log.lh.n; i++) {
    log.lh.block[i] = lh->block[i];
  }
  brelse(buf);
}

static void
write_head(void)
{
  struct buf *buf = bread(log.dev, log.start);
  struct logheader *hb = (struct logheader *) (buf->data);
  int i;
  hb->n = log.lh.n;
  for (i = 0; i < log.lh.n; i++) {
    hb->block[i] = log.lh.block[i];
  }
  bwrite(buf);
  brelse(buf);
}

数据从log区转换到实际位置区 `install_trans(void)`

xv6系统中，把一次log的commit称作transcation ，即事务，所以函数名为：install trans.
读log头，获取需要转换的log总数目以及目的block块，进行数据转移。

static void
install_trans(void)
{
  int tail;

  for (tail = 0; tail < log.lh.n; tail++) {
    struct buf *lbuf = bread(log.dev, log.start+tail+1); // read log block
    struct buf *dbuf = bread(log.dev, log.lh.block[tail]); // read dst
    memmove(dbuf->data, lbuf->data, BSIZE);  // copy block to dst
    bwrite(dbuf);  // write dst to disk
    brelse(lbuf);
    brelse(dbuf);
  }
}

数据恢复处理函数`recover_from_log()`

static void
recover_from_log(void)
{
  read_head();
  install_trans();
  log.lh.n = 0;
  write_head();
}

把数据从内存的cache区写入到磁盘的log区

log区的第一个block块保存的是logheader数据，所以从第二个block块开始保存数据。

static void
write_log(void)
{
  int tail;

  for (tail = 0; tail < log.lh.n; tail++) {
    struct buf *to = bread(log.dev, log.start+tail+1); // log block
    struct buf *from = bread(log.dev, log.lh.block[tail]); // cache block
    memmove(to->data, from->data, BSIZE);
    bwrite(to);  // write the log
    brelse(from);
    brelse(to);
  }
}

log由内存写入到磁盘的事务级处理`void commit()`

这个过程是事务级处理，要么全部写入磁盘成功，要么全部写入失败。它的操作步骤为：

首先把数据输入到log暂存区
刷新log header信息
把数据从log暂存区转移到数据真正的位置上
重置log header信息


static void
commit()
{
  if (log.lh.n > 0) {
    write_log();     // Write modified blocks from cache to log
    write_head();    // Write header to disk -- the real commit
    install_trans(); // Now install writes to home locations
    log.lh.n = 0;
    write_head();    // Erase the transaction from the log
  }
}

使用文件系统调用前, 执行`begin_op()`

判断当前log是否在commit过程中，如果是，睡眠等待。
判断当前的log剩余空间是否不足，如果是，睡眠等待。
都满足条件，增加计数。

void
begin_op(void)
{
  acquire(&log.lock);
  while(1){
    if(log.committing){
      sleep(&log, &log.lock);
    } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){
      // this op might exhaust log space; wait for commit.
      sleep(&log, &log.lock);
    } else {
      log.outstanding += 1;
      release(&log.lock);
      break;
    }
  }
}

带log机制的写buffer操作——对外提供的写操作的接口

当在内存中修改了一个磁盘数据后，如果想从缓存真正写入的磁盘上，有bwirte()函数。但是，xv6支持了log机制，使用log_write()代替了bwrite()函数来完成这个工作。该函数其实只完成了两个事：

在logheader中记录要写入的block块
标记记录的buffer为dirty状态，即表示待写入磁盘上

真正的正磁盘的动作在end_op()函数中来完成。

void
log_write(struct buf *b)
{
  int i;

  if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1)
    panic("too big a transaction");
  if (log.outstanding < 1)
    panic("log_write outside of trans");

  acquire(&log.lock);
  for (i = 0; i < log.lh.n; i++) {
    if (log.lh.block[i] == b->blockno)   // log absorbtion
      break;
  }
  log.lh.block[i] = b->blockno;
  if (i == log.lh.n)
    log.lh.n++;
  b->flags |= B_DIRTY; // prevent eviction
  release(&log.lock);
}

使用文件系统调用后，执行`end_op()`

它真正的执行写磁盘的操作，即对应函数体中的commit动作。

void
end_op(void)
{
  int do_commit = 0;

  acquire(&log.lock);
  log.outstanding -= 1;
  if(log.committing)
    panic("log.committing");
  if(log.outstanding == 0){
    do_commit = 1;
    log.committing = 1;
  } else {
    // begin_op() may be waiting for log space,
    // and decrementing log.outstanding has decreased
    // the amount of reserved space.
    wakeup(&log);
  }
  release(&log.lock);

  if(do_commit){
    // call commit w/o holding locks, since not allowed
    // to sleep with locks.
    commit();
    acquire(&log.lock);
    log.committing = 0;
    wakeup(&log);
    release(&log.lock);
  }
}

相关阅读:
二货Mysql中设置字段的默认值问题
Mongodb第一步资料
时间时间
嵌入式linux应用程序移植方法总结
capwap DTSL 加密分析
capwap协议重点分析
一点论文写作心得
live555+ffmpeg如何提取关键帧（I帧，P帧，B帧）
“以图搜图”引擎及网站合集
常见的希腊字母的读法

原文地址：https://www.cnblogs.com/yinheyi/p/16464423.html