• leveldb学习:Versionedit和Versionset


    VersionEdit:

    compact过程中会有一系列改变当前Version的操作(FileNumber添加。删除input的sstable,添加输出的sstable)。为了缩小version切换的时间点,将这些操作封装成versionedit,compact完毕时,将versionedit中的操作一次应用到当前version就可以得到最新状态的version。


    versionedit的成员变量:

     private:
      friend class VersionSet;
      typedef std::set< std::pair<int, uint64_t> > DeletedFileSet;
      std::string comparator_;
      uint64_t log_number_;
      uint64_t prev_log_number_;
      uint64_t next_file_number_;
      SequenceNumber last_sequence_;
      bool has_comparator_;
      bool has_log_number_;
      bool has_prev_log_number_;
      bool has_next_file_number_;
      bool has_last_sequence_;
      std::vector< std::pair<int, InternalKey> > compact_pointers_;
      DeletedFileSet deleted_files_;
      std::vector< std::pair<int, FileMetaData> > new_files_;

    deleted_files_和new_files_记录的是compact过程的input sstable和output sstable。
    每一次compact之后都会讲相应的versionedit encode进入manifest文件。參考函数encodeto。

    void VersionEdit::EncodeTo(std::string* dst) const {
      if (has_comparator_) {
        PutVarint32(dst, kComparator);
        PutLengthPrefixedSlice(dst, comparator_);
      }
      if (has_log_number_) {
        PutVarint32(dst, kLogNumber);
        PutVarint64(dst, log_number_);
      }
      if (has_prev_log_number_) {
        PutVarint32(dst, kPrevLogNumber);
        PutVarint64(dst, prev_log_number_);
      }
      if (has_next_file_number_) {
        PutVarint32(dst, kNextFileNumber);
        PutVarint64(dst, next_file_number_);
      }
      if (has_last_sequence_) {
        PutVarint32(dst, kLastSequence);
        PutVarint64(dst, last_sequence_);
      }
      for (size_t i = 0; i < compact_pointers_.size(); i++) {
        PutVarint32(dst, kCompactPointer);
        PutVarint32(dst, compact_pointers_[i].first);  // level
        PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode());
      }
      for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
           iter != deleted_files_.end();
           ++iter) {
        PutVarint32(dst, kDeletedFile);
        PutVarint32(dst, iter->first);   // level
        PutVarint64(dst, iter->second);  // file number
      }
      for (size_t i = 0; i < new_files_.size(); i++) {
        const FileMetaData& f = new_files_[i].second;
        PutVarint32(dst, kNewFile);
        PutVarint32(dst, new_files_[i].first);  // level
        PutVarint64(dst, f.number);
        PutVarint64(dst, f.file_size);
        PutLengthPrefixedSlice(dst, f.smallest.Encode());
        PutLengthPrefixedSlice(dst, f.largest.Encode());
      }
    }

    VersionSet::Builder

    将VersionEdit应用到VersionSet上的过程封装成VersionSet::Builder。主要是更新Version::files_[]。

    class VersionSet::Builder {
     private:
      // Helper to sort by v->files_[file_number].smallest
    //处理version::files_[i]中的filemetadata的排序
      struct BySmallestKey {
        const InternalKeyComparator* internal_comparator;
        bool operator()(FileMetaData* f1, FileMetaData* f2) const {
          int r = internal_comparator->Compare(f1->smallest, f2->smallest);
          if (r != 0) {
            return (r < 0);
          } else {
            // Break ties by file number
            return (f1->number < f2->number);
          }
        }
      };
    
    //排序的sstable集合
      typedef std::set<FileMetaData*, BySmallestKey> FileSet;
    //要加入和删除的sstable集合
      struct LevelState {
        std::set<uint64_t> deleted_files;
        FileSet* added_files;
      };
    //要更新的versionset
      VersionSet* vset_;
    //基准的version,compact后,将current_传入作为base
      Version* base_;
    //各个level上要更新的文件集合
      LevelState levels_[config::kNumLevels];

    事实上也就是VersionEdit放置sstable修改信息(主要为deleted_files_和new_files_)。Builder依据这些信息完毕files_[]的修改(參见函数Apply、SaveTo和MaybeAddFile)。

    VersionSet

    在version的博客里。我们说versionset是DB中的version的集合。整个DB的当前状态被VersionSet管理者。器重由当前最新的Version以及其它正在服务的Version链表,全局的SequenceNumber。当前的manifest_file_number,封装sstable的Tablecache。

    (详细含义可见“leveldb学习开篇”中转载的一篇博客)每一个level中下一次compact要选取的start_key等等。

    private:  
      Env* const env_;//about磁盘读写
      const std::string dbname_;
      const Options* const options_;
      TableCache* const table_cache_;//操作sstable的tablecache
      const InternalKeyComparator icmp_;
      uint64_t next_file_number_;//下一个可用的filenumber
      uint64_t manifest_file_number_;//manifest文件
      uint64_t last_sequence_;//最后用过的SequenceNumber
      uint64_t log_number_;//log文件的filenumber
      uint64_t prev_log_number_;  // 0 or backing store for memtable being compacted
      // Opened lazily
      WritableFile* descriptor_file_;
      log::Writer* descriptor_log_;
      Version dummy_versions_;  // version链表,表头  Head of circular doubly-linked list of versions.
      Version* current_;        // 当前最新的Version    == dummy_versions_.prev_
      // Per-level key at which the next compaction at that level should start.
      // Either an empty string, or a valid InternalKey.
      std::string compact_pointer_[config::kNumLevels];

    为了尽量均匀compact每一个level,所以会将这一次comapct的end_key作为下一次compact的start_key,compactor_pointer_就保存着每一个level下一次compact的start_key。

    以下来看成员函数,versionset的细节在函数中解说:

    void VersionSet::AppendVersion(Version* v) {
      // Make "v" current
      assert(v->refs_ == 0);
      assert(v != current_);
      if (current_ != NULL) {
        current_->Unref();
      }
      current_ = v;
      v->Ref();
      // Append to linked list
      v->prev_ = dummy_versions_.prev_;
      v->next_ = &dummy_versions_;
      v->prev_->next_ = v;
      v->next_->prev_ = v;
    }

    加入version,全部的version是用一个环状链表保存指针的,加入version也就是更新dummy_versions_的前后指针,而且设置V为当前version(current_)

    Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
      if (edit->has_log_number_) {
        assert(edit->log_number_ >= log_number_);
        assert(edit->log_number_ < next_file_number_);
      } else {
        edit->SetLogNumber(log_number_);
      }
      if (!edit->has_prev_log_number_) {
        edit->SetPrevLogNumber(prev_log_number_);
      }
      edit->SetNextFile(next_file_number_);
      edit->SetLastSequence(last_sequence_);
      Version* v = new Version(this);
      {
        Builder builder(this, current_);
        builder.Apply(edit);
        builder.SaveTo(v);
      }
      Finalize(v);
      // Initialize new descriptor log file if necessary by creating
      // a temporary file that contains a snapshot of the current version.
      std::string new_manifest_file;
      Status s;
      if (descriptor_log_ == NULL) {
        // No reason to unlock *mu here since we only hit this path in the
        // first call to LogAndApply (when opening the database).
        assert(descriptor_file_ == NULL);
        new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_);
        edit->SetNextFile(next_file_number_);
        s = env_->NewWritableFile(new_manifest_file, &descriptor_file_);
        if (s.ok()) {
          descriptor_log_ = new log::Writer(descriptor_file_);
          s = WriteSnapshot(descriptor_log_);
        }
      }
      // Unlock during expensive MANIFEST log write
      {
        mu->Unlock();
        // Write new record to MANIFEST log
        if (s.ok()) {
          std::string record;
          edit->EncodeTo(&record);
          s = descriptor_log_->AddRecord(record);
          if (s.ok()) {
            s = descriptor_file_->Sync();
          }
          if (!s.ok()) {
            Log(options_->info_log, "MANIFEST write: %s
    ", s.ToString().c_str());
          }
        }
        // If we just created a new descriptor file, install it by writing a
        // new CURRENT file that points to it.
        if (s.ok() && !new_manifest_file.empty()) {
          s = SetCurrentFile(env_, dbname_, manifest_file_number_);
        }
        mu->Lock();
      }
      // Install the new version
      if (s.ok()) {
        AppendVersion(v);
        log_number_ = edit->log_number_;
        prev_log_number_ = edit->prev_log_number_;
      } else {
        delete v;
        if (!new_manifest_file.empty()) {
          delete descriptor_log_;
          delete descriptor_file_;
          descriptor_log_ = NULL;
          descriptor_file_ = NULL;
          env_->DeleteFile(new_manifest_file);
        }
      }
      return s;
    }

    产生新版本号并更新当中files_[]:
    首先输入參数VersionEdit记录了leveldb每次更新的信息,包含新加入的sstable以及要删除的sstable,则仅仅需利用VersionSet::Builder更新新产生的version对象,然后将其加入version的链表中。并指定为current_。
    然后还要向manifest文件写入新version的信息(为了重新启动db后能够恢复推出前的状态。须要将db中的状态保存下来。这些信息就保存在manfest文件里)

    void VersionSet::Finalize(Version* v)

    对version中的每一级sstable做一个评估。选择score最高的level作为须要compact的level(compaction_level_)。评估是依据每一个level上文件的大小(level 你,n>0)和数量(level 0)

    uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
      uint64_t result = 0;
      for (int level = 0; level < config::kNumLevels; level++) {
        const std::vector<FileMetaData*>& files = v->files_[level];
        for (size_t i = 0; i < files.size(); i++) {
          if (icmp_.Compare(files[i]->largest, ikey) <= 0) {
            // Entire file is before "ikey", so just add the file size
            result += files[i]->file_size;
          } else if (icmp_.Compare(files[i]->smallest, ikey) > 0) {
            // Entire file is after "ikey", so ignore
            if (level > 0) {
              // Files other than level 0 are sorted by meta->smallest, so
              // no further files in this level will contain data for
              // "ikey".
              break;
            }
          } else {
            // "ikey" falls in the range for this table.  Add the
            // approximate offset of "ikey" within the table.
            Table* tableptr;
            Iterator* iter = table_cache_->NewIterator(
                ReadOptions(), files[i]->number, files[i]->file_size, &tableptr);
            if (tableptr != NULL) {
              result += tableptr->ApproximateOffsetOf(ikey.Encode());
            }
            delete iter;
          }
        }
      }
      return result;
    }

    在version中寻找ikey的记录,返回偏移量,由于sstable间entries是不反复的。所以能够通过推断ikey和largest key的大小得到结果。

    void VersionSet::AddLiveFiles(std::set<uint64_t>* live) {
      for (Version* v = dummy_versions_.next_;
           v != &dummy_versions_;
           v = v->next_) {
        for (int level = 0; level < config::kNumLevels; level++) {
          const std::vector<FileMetaData*>& files = v->files_[level];
          for (size_t i = 0; i < files.size(); i++) {
            live->insert(files[i]->number);
          }
        }
      }
    }

    求得全部version中的sstable的文件节点,放置在lives序列中。

    void VersionSet::GetRange(const std::vector<FileMetaData*>& inputs,
                              InternalKey* smallest,
                              InternalKey* largest) {
      assert(!inputs.empty());
      smallest->Clear();
      largest->Clear();
      for (size_t i = 0; i < inputs.size(); i++) {
        FileMetaData* f = inputs[i];
        if (i == 0) {
          *smallest = f->smallest;
          *largest = f->largest;
        } else {
          if (icmp_.Compare(f->smallest, *smallest) < 0) {
            *smallest = f->smallest;
          }
          if (icmp_.Compare(f->largest, *largest) > 0) {
            *largest = f->largest;
          }
        }
      }
    }

    获得input范围内存储的key范围。保存在smallest和largest内。相似的还有GetRange2函数。不同的是GetRange2获取的是两个输入文件群内的key值范围,先将两个文件群容器合并,在调用GetRange。

    好了。加上上篇的关于version的介绍。事实上整个version、versionset的内容我仅仅写了一部分,但对于理解leveldb的版本号控制我想应该有了一个较为清楚的认识,version是有关版本号信息的类,主要成员变量是files_[kNumLevels]的容器,记录这个版本号的全部sstable信息,versionset就是全部历史versions的集合。而versionedit和versionset::builder都是为了更新version的sstable信息。
    如有错误,欢迎大家指正。

  • 相关阅读:
    CloudStack 实现VM高可用特性
    cloudstack基础知识
    cloudstack4.5私有云集群规划与安装
    小心了,这个设置会导致你的vm重启时被强制重装系统!
    CloudStack名词解释
    javatoexe之exe4j和innosetup打包jar
    oracle之partition by与group by的区别
    Android中传递对象的三种方法
    设计模式之mvp设计模式
    正则表达式之环视(lookaround)
  • 原文地址:https://www.cnblogs.com/zhchoutai/p/7253724.html
Copyright © 2020-2023  润新知