• leveldb 学习记录(七) SSTable构造


    使用TableBuilder构造一个Table

     1 struct TableBuilder::Rep {                       // TableBuilder内部使用的结构,记录当前的一些状态等
     2         Options options;
     3         Options index_block_options;
     4         WritableFile* file;                             // 对应的.sst文件
     5         uint64_t offset;
     6         Status status;
     7         BlockBuilder data_block;                        // Data Block
     8         BlockBuilder index_block;                       // Index Block
     9         std::string last_key;                           // 添加的最后一个key,一方面用于key是否排序的判断,另一方面当写入一个Data 
    10                                                         //+ Block时记录index Block中索引项(last_key+offset+size)
    11         int64_t num_entries;                            // .sst文件中已经添加的key/value数量
    12         bool closed;                                 // Either Finish() or Abandon() has been called.
    13  
    14        // Add下一Block的第一个key/value时,才根据这个key构造一个FindShortSuccessor,
    15        // 写入Index Block中的一个entry(max_key+offset+size),是为了能够找到
    16        // 一个更短的分割2个Block的key,从而减少存储容量;
    17        // 只有Finish中是根据最后一个Block的最后一个key构造的。
    18         // We do not emit the index entry for a block until we have seen the
    19         // first key for the next data block.  This allows us to use shorter
    20         // keys in the index block.  For example, consider a block boundary
    21         // between the keys "the quick brown fox" and "the who".  We can use
    22         // "the r" as the key for the index block entry since it is >= all
    23         // entries in the first block and < all entries in subsequent
    24         // blocks.
    25         //
    26         // Invariant: r->pending_index_entry is true only if data_block is empty.
    27         bool pending_index_entry;                       // 标识是否刚写入一个Data Block,控制在Index
    28                                                         //+ Block中添加一项索引信息(last_key+offset+size)
    29         BlockHandle pending_handle;  // Handle to add to index block
    30  
    31         std::string compressed_output;                  // 数据压缩
    32  
    33         Rep(const Options& opt, WritableFile* f)        // 构造函数
    34             : options(opt),
    35             index_block_options(opt),
    36             file(f),
    37             offset(0),
    38             data_block(&options),
    39             index_block(&index_block_options),
    40             num_entries(0),
    41             closed(false),
    42             pending_index_entry(false) 
    43         {
    44             index_block_options.block_restart_interval = 1; // Index Block中每个restart块只有一个record,查找方便
    45         }
    46     };// struct TableBuilder::Rep ;

    TableBuilder头文件

     1 class TableBuilder {
     2  public:
     3   // Create a builder that will store the contents of the table it is
     4   // building in *file.  Does not close the file.  It is up to the
     5   // caller to close the file after calling Finish().
     6 //创建一个基于file的builder,存储table. 使用期间不能关闭文件,在调用Finish()后调用方关闭文件
     7   TableBuilder(const Options& options, WritableFile* file);
     8 
     9   // REQUIRES: Either Finish() or Abandon() has been called.
    10   ~TableBuilder();
    11 
    12   // Change the options used by this builder.  Note: only some of the
    13   // option fields can be changed after construction.  If a field is
    14   // not allowed to change dynamically and its value in the structure
    15   // passed to the constructor is different from its value in the
    16   // structure passed to this method, this method will return an error
    17   // without changing any fields.
    18   Status ChangeOptions(const Options& options);
    19 
    20   // Add key,value to the table being constructed.
    21   // REQUIRES: key is after any previously added key according to comparator.
    22   // REQUIRES: Finish(), Abandon() have not been called
    23   //添加key value  稍后查看代码
    24   void Add(const Slice& key, const Slice& value);
    25 
    26   // Advanced operation: flush any buffered key/value pairs to file.
    27   // Can be used to ensure that two adjacent entries never live in
    28   // the same data block.  Most clients should not need to use this method.
    29   // REQUIRES: Finish(), Abandon() have not been called
    30   void Flush();
    31 
    32   // Return non-ok iff some error has been detected.
    33   Status status() const;
    34 
    35   // Finish building the table.  Stops using the file passed to the
    36   // constructor after this function returns.
    37   // REQUIRES: Finish(), Abandon() have not been called
    38 
    39   Status Finish();
    40 
    41   // Indicate that the contents of this builder should be abandoned.  Stops
    42   // using the file passed to the constructor after this function returns.
    43   // If the caller is not going to call Finish(), it must call Abandon()
    44   // before destroying this builder.
    45   // REQUIRES: Finish(), Abandon() have not been called
    46   void Abandon();
    47 
    48   // Number of calls to Add() so far.
    49   uint64_t NumEntries() const;
    50 
    51   // Size of the file generated so far.  If invoked after a successful
    52   // Finish() call, returns the size of the final generated file.
    53   uint64_t FileSize() const;
    54 
    55  private:
    56   bool ok() const { return status().ok(); }
    57   void WriteBlock(BlockBuilder* block, BlockHandle* handle);
    58 
    59   struct Rep;
    60   Rep* rep_;
    61 
    62   // No copying allowed
    63   TableBuilder(const TableBuilder&);
    64   void operator=(const TableBuilder&);
    65 };

    主要是按照格式填充  这里做了简单的注释

    // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style license that can be
    // found in the LICENSE file. See the AUTHORS file for names of contributors.
    
    #include "leveldb/table_builder.h"
    
    #include <assert.h>
    #include <stdio.h>
    #include "leveldb/comparator.h"
    #include "leveldb/env.h"
    #include "table/block_builder.h"
    #include "table/format.h"
    #include "util/coding.h"
    #include "util/crc32c.h"
    #include "util/logging.h"
    
    namespace leveldb {
    
    struct TableBuilder::Rep {
      Options options;
      Options index_block_options;
      WritableFile* file;
      uint64_t offset;
      Status status;
      BlockBuilder data_block;
      BlockBuilder index_block;
      std::string last_key;
      int64_t num_entries;
      bool closed;          // Either Finish() or Abandon() has been called.
    
      // We do not emit the index entry for a block until we have seen the
      // first key for the next data block.  This allows us to use shorter
      // keys in the index block.  For example, consider a block boundary
      // between the keys "the quick brown fox" and "the who".  We can use
      // "the r" as the key for the index block entry since it is >= all
      // entries in the first block and < all entries in subsequent
      // blocks.
      //
      // Invariant: r->pending_index_entry is true only if data_block is empty.
      bool pending_index_entry;
      BlockHandle pending_handle;  // Handle to add to index block
    
      std::string compressed_output;
    
      Rep(const Options& opt, WritableFile* f)
          : options(opt),
            index_block_options(opt),
            file(f),
            offset(0),
            data_block(&options),
            index_block(&index_block_options),
            num_entries(0),
            closed(false),
            pending_index_entry(false) {
        index_block_options.block_restart_interval = 1;
      }
    };
    
    TableBuilder::TableBuilder(const Options& options, WritableFile* file)
        : rep_(new Rep(options, file)) {
    }
    
    TableBuilder::~TableBuilder() {
      assert(rep_->closed);  // Catch errors where caller forgot to call Finish()
      delete rep_;
    }
    
    Status TableBuilder::ChangeOptions(const Options& options) {
      // Note: if more fields are added to Options, update
      // this function to catch changes that should not be allowed to
      // change in the middle of building a Table.
      if (options.comparator != rep_->options.comparator) {
        return Status::InvalidArgument("changing comparator while building table");
      }
    
      // Note that any live BlockBuilders point to rep_->options and therefore
      // will automatically pick up the updated options.
      rep_->options = options;
      rep_->index_block_options = options;
      rep_->index_block_options.block_restart_interval = 1;
      return Status::OK();
    }
    
    void TableBuilder::Add(const Slice& key, const Slice& value) {
      Rep* r = rep_;
      assert(!r->closed);
      if (!ok()) return;    //确保Rep没有关闭 并且状态正常
    
      //如果不是添加的table本身的属性  添加的key 必然是有序的的 否则报错
      if (r->num_entries > 0) {
        assert(r->options.comparator->Compare(key, Slice(r->last_key)) > 0);
      }
    
      //pending_index_entry标记是否是新创建的一个block 
      //当新创建一个block时 才可能确认上一个block和新block之间的key的一个分割字符串 记录在lastkey和index_block 方便以后查找key 定位
    
      if (r->pending_index_entry) {
        assert(r->data_block.empty());
        //comparator 中有 FindShortestSeparator() / FindShortSuccessor()两个接口,
        //FindShortestSeparator(start, limit)是获得大于 start 但小于 limit 的最小值。
        //FindShortSuccessor(start)是获得比 start 大的最小值。比较都基于 user - commparator,二者会被
        //用来确定 sstable 中 block 的 end - key。
        r->options.comparator->FindShortestSeparator(&r->last_key, key);
        std::string handle_encoding;
        r->pending_handle.EncodeTo(&handle_encoding);
        r->index_block.Add(r->last_key, Slice(handle_encoding));
        r->pending_index_entry = false;
      }
      //更新lastkey 跟新记录计数 添加data block
      r->last_key.assign(key.data(), key.size());
      r->num_entries++;
      r->data_block.Add(key, value);
    
      //data block 大于指定size 进行flush操作
      const size_t estimated_block_size = r->data_block.CurrentSizeEstimate();
      if (estimated_block_size >= r->options.block_size) {
        Flush();
      }
    }
    
    //block flush落盘
    void TableBuilder::Flush() {
      Rep* r = rep_;
      assert(!r->closed);
      if (!ok()) return;
      if (r->data_block.empty()) return;
      assert(!r->pending_index_entry);
      WriteBlock(&r->data_block, &r->pending_handle);
      if (ok()) {
        r->pending_index_entry = true;
        r->status = r->file->Flush();
      }
    }
    
    //每个block data 包含 n个字节内容  以及type 1个字节  crc 4个字节
    void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
      // File format contains a sequence of blocks where each block has:
      //    block_data: uint8[n]
      //    type: uint8
      //    crc: uint32
      assert(ok());
      Rep* r = rep_;
      Slice raw = block->Finish();
    
      Slice block_contents;
      CompressionType type = r->options.compression;
      // TODO(postrelease): Support more compression options: zlib?
      switch (type) {
        case kNoCompression:
          block_contents = raw;
          break;
    
        case kSnappyCompression: {
          std::string* compressed = &r->compressed_output;
          if (port::Snappy_Compress(raw.data(), raw.size(), compressed) &&
              compressed->size() < raw.size() - (raw.size() / 8u)) {
            block_contents = *compressed;
          } else {
            // Snappy not supported, or compressed less than 12.5%, so just
            // store uncompressed form
            block_contents = raw;
            type = kNoCompression;
          }
          break;
        }
      }
      handle->set_offset(r->offset);
      handle->set_size(block_contents.size());
      r->status = r->file->Append(block_contents);
      if (r->status.ok()) {
        char trailer[kBlockTrailerSize];
        trailer[0] = type;
        uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size());
        crc = crc32c::Extend(crc, trailer, 1);  // Extend crc to cover block type
        EncodeFixed32(trailer+1, crc32c::Mask(crc));
        r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
        if (r->status.ok()) {
          r->offset += block_contents.size() + kBlockTrailerSize;
        }
      }
      r->compressed_output.clear();
      block->Reset();
    }
    
    Status TableBuilder::status() const {
      return rep_->status;
    }
    
    Status TableBuilder::Finish() {
      Rep* r = rep_;
      Flush();
      assert(!r->closed);
      r->closed = true;
      BlockHandle metaindex_block_handle;
      BlockHandle index_block_handle;
      if (ok()) {
        BlockBuilder meta_index_block(&r->options);
        // TODO(postrelease): Add stats and other meta blocks
        WriteBlock(&meta_index_block, &metaindex_block_handle);
      }
      if (ok()) {
        if (r->pending_index_entry) {
          r->options.comparator->FindShortSuccessor(&r->last_key);
          std::string handle_encoding;
          r->pending_handle.EncodeTo(&handle_encoding);
          r->index_block.Add(r->last_key, Slice(handle_encoding));
          r->pending_index_entry = false;
        }
        WriteBlock(&r->index_block, &index_block_handle);
      }
      if (ok()) {
        Footer footer;
        footer.set_metaindex_handle(metaindex_block_handle);
        footer.set_index_handle(index_block_handle);
        std::string footer_encoding;
        footer.EncodeTo(&footer_encoding);
        r->status = r->file->Append(footer_encoding);
        if (r->status.ok()) {
          r->offset += footer_encoding.size();
        }
      }
      return r->status;
    }
    
    void TableBuilder::Abandon() {
      Rep* r = rep_;
      assert(!r->closed);
      r->closed = true;
    }
    
    uint64_t TableBuilder::NumEntries() const {
      return rep_->num_entries;
    }
    
    uint64_t TableBuilder::FileSize() const {
      return rep_->offset;
    }
    
    }

    参考 

    https://blog.csdn.net/tankles/article/details/7663918

    《leveldb实现解析》淘宝 那岩

    作 者: itdef
    欢迎转帖 请保持文本完整并注明出处
    技术博客 http://www.cnblogs.com/itdef/
    B站算法视频题解
    https://space.bilibili.com/18508846
    qq 151435887
    gitee https://gitee.com/def/
    欢迎c c++ 算法爱好者 windows驱动爱好者 服务器程序员沟通交流
    如果觉得不错,欢迎点赞,你的鼓励就是我的动力
    阿里打赏 微信打赏
  • 相关阅读:
    ACM TJU 1556
    HDU 1890 Robotie Sort
    Android学习笔记
    HDU 2795
    HDU 1542
    HDU 1698
    POJ 2185
    学习笔记
    HDU 3336
    HDU 3746
  • 原文地址:https://www.cnblogs.com/itdef/p/9808734.html
Copyright © 2020-2023  润新知