• 【caffe I/O】数据读取层 代码中文注释


    caffe.proto中DataParameter部分

    message DataParameter {
      //输入数据使用的DB类型
      enum DB {
        LEVELDB = 0;//使用LEVELDB
        LMDB = 1;   //使用LMDB
      }
      // Specify the data source.源数据的路径
      optional string source = 1;
      // Specify the batch size.一个批量数据包含的图片数目
      optional uint32 batch_size = 4;
      // The rand_skip variable is for the data layer to skip a few data points
      // to avoid all asynchronous sgd clients to start at the same point. The skip
      // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
      // be larger than the number of keys in the database.
      // DEPRECATED. Each solver accesses a different subset of the database.
      // 随机跳过若干图片。防止SGD从同一起点开始。已弃用。
      optional uint32 rand_skip = 7 [default = 0];
      optional DB backend = 8 [default = LEVELDB];//默认输入数据使用DB类型。默认LEVELDB
      // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
      // simple scaling and subtracting the data mean, if provided. Note that the
      // mean subtraction is always carried out before scaling.
      // 弃用。使用TransformationParameter
      optional float scale = 2 [default = 1];
      optional string mean_file = 3;
      // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
      // crop an image.
      optional uint32 crop_size = 5 [default = 0];
      // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
      // data.
      optional bool mirror = 6 [default = false];
      // Force the encoded image to have 3 color channels 强制编码图像为三通道彩色图像
      optional bool force_encoded_color = 9 [default = false];
      // Prefetch queue (Increase if data feeding bandwidth varies, within the
      // limit of device memory for GPU training)
      // 预取队列 (在硬件设备允许的情况下,预先放到主机内存中的批量数,默认为4个batch)
      optional uint32 prefetch = 10 [default = 4];
    }

     include/caffe/layers/base_data_layer.hpp

     1 #ifndef CAFFE_DATA_LAYERS_HPP_
     2 #define CAFFE_DATA_LAYERS_HPP_
     3 
     4 #include <vector>
     5 
     6 #include "caffe/blob.hpp"
     7 #include "caffe/data_transformer.hpp"
     8 #include "caffe/internal_thread.hpp"
     9 #include "caffe/layer.hpp"
    10 #include "caffe/proto/caffe.pb.h"
    11 #include "caffe/util/blocking_queue.hpp"
    12 
    13 namespace caffe {
    14 
    15 /**
    16  * @brief Provides base for data layers that feed blobs to the Net.
    17  *
    18  * TODO(dox): thorough documentation for Forward and proto params.
    19  */
    20 //基本数据层,派生于Layer
    21 template <typename Dtype>
    22 class BaseDataLayer : public Layer<Dtype> {
    23  public:
    24   explicit BaseDataLayer(const LayerParameter& param);
    25   // LayerSetUp: implements common data layer setup functionality, and calls
    26   // DataLayerSetUp to do special data layer setup for individual layer types.
    27   // This method may not be overridden except by the BasePrefetchingDataLayer.
    28   //通用层配置功能。之后调用DataLayerSetUp进行数据读取层的特殊配置
    29   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
    30       const vector<Blob<Dtype>*>& top);
    31   virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
    32       const vector<Blob<Dtype>*>& top) {}
    33   // Data layers have no bottoms, so reshaping is trivial.
    34   //数据读取层没有输入Bottom Blob,变形操作不是很重要
    35   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
    36       const vector<Blob<Dtype>*>& top) {}
    37   //反向传播函数不需要做任何事情
    38   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
    39       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
    40   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
    41       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
    42 
    43  protected:
    44   TransformationParameter transform_param_;//数据预处理变换器参数
    45   shared_ptr<DataTransformer<Dtype> > data_transformer_;//数据预处理变换器
    46   bool output_labels_;//是否输出标签数据
    47 };
    48 
    49 //批量数据,用于存放数据读取层输出
    50 template <typename Dtype>
    51 class Batch {
    52  public:
    53   Blob<Dtype> data_, label_;//两个Blob分别用来存储图片数据和标签
    54 };
    55 
    56 //带预取功能的数据读取层,派生于BaseDataLayer和InternalThread
    57 template <typename Dtype>
    58 class BasePrefetchingDataLayer :
    59     public BaseDataLayer<Dtype>, public InternalThread {
    60  public:
    61   explicit BasePrefetchingDataLayer(const LayerParameter& param);
    62   // LayerSetUp: implements common data layer setup functionality, and calls
    63   // DataLayerSetUp to do special data layer setup for individual layer types.
    64   // This method may not be overridden.层设置
    65   void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
    66       const vector<Blob<Dtype>*>& top);
    67 
    68   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    69       const vector<Blob<Dtype>*>& top);//前向
    70   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    71       const vector<Blob<Dtype>*>& top);
    72 
    73  protected:
    74   virtual void InternalThreadEntry();//内部线程入口
    75   virtual void load_batch(Batch<Dtype>* batch) = 0;//载入批量数据,纯虚函数
    76 
    77   vector<shared_ptr<Batch<Dtype> > > prefetch_;//抓取
    78   BlockingQueue<Batch<Dtype>*> prefetch_free_;//空闲Batch队列
    79   BlockingQueue<Batch<Dtype>*> prefetch_full_;//已加载Batch队列
    80   Batch<Dtype>* prefetch_current_;
    81 
    82   Blob<Dtype> transformed_data_;//变换后的数据
    83 };
    84 
    85 }  // namespace caffe
    86 
    87 #endif  // CAFFE_DATA_LAYERS_HPP_

     src/caffe/layers/base_data_layer.cpp

      1 #include <boost/thread.hpp>
      2 #include <vector>
      3 
      4 #include "caffe/blob.hpp"
      5 #include "caffe/data_transformer.hpp"
      6 #include "caffe/internal_thread.hpp"
      7 #include "caffe/layer.hpp"
      8 #include "caffe/layers/base_data_layer.hpp"
      9 #include "caffe/proto/caffe.pb.h"
     10 #include "caffe/util/blocking_queue.hpp"
     11 
     12 namespace caffe {
     13 
     14 //构造函数。初始化Layer参数、数据变换器参数
     15 template <typename Dtype>
     16 BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
     17     : Layer<Dtype>(param),
     18       transform_param_(param.transform_param()) {
     19 }
     20 
     21 //BaseDataLayer层设置
     22 template <typename Dtype>
     23 void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     24       const vector<Blob<Dtype>*>& top) {
     25   if (top.size() == 1) {//判断输出Blob数目。1则只输出data,2则输出data和label
     26     output_labels_ = false;
     27   } else {
     28     output_labels_ = true;
     29   }
     30   //初始化数据变换器对象
     31   data_transformer_.reset(
     32       new DataTransformer<Dtype>(transform_param_, this->phase_));
     33   data_transformer_->InitRand();
     34   // The subclasses should setup the size of bottom and top
     35   DataLayerSetUp(bottom, top);//子类负责设置Top Blob形状
     36 }
     37 
     38 //BasePrefetchingDataLayer构造函数
     39 template <typename Dtype>
     40 BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
     41     const LayerParameter& param)
     42     : BaseDataLayer<Dtype>(param),
     43       prefetch_(param.data_param().prefetch()),
     44       prefetch_free_(), prefetch_full_(), prefetch_current_() {
     45   for (int i = 0; i < prefetch_.size(); ++i) {
     46     prefetch_[i].reset(new Batch<Dtype>());
     47     prefetch_free_.push(prefetch_[i].get());//将batch对象都放入空闲队列
     48   }
     49 }
     50 
     51 //BasePrefetchingDataLayer层配置函数
     52 template <typename Dtype>
     53 void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
     54     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
     55   BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
     56 
     57   // Before starting the prefetch thread, we make cpu_data and gpu_data
     58   // calls so that the prefetch thread does not accidentally make simultaneous
     59   // cudaMalloc calls when the main thread is running. In some GPUs this
     60   // seems to cause failures if we do not so.
     61   //在开启数据预取线程前,通过调用Blob相应函数先进行cudaMalloc,
     62   //避免多线程情况下同时进行cudaMalloc,会导致cuda API调用失败
     63   for (int i = 0; i < prefetch_.size(); ++i) {
     64     prefetch_[i]->data_.mutable_cpu_data();
     65     if (this->output_labels_) {
     66       prefetch_[i]->label_.mutable_cpu_data();
     67     }
     68   }
     69   //GPU
     70 #ifndef CPU_ONLY
     71   if (Caffe::mode() == Caffe::GPU) {
     72     for (int i = 0; i < prefetch_.size(); ++i) {
     73       prefetch_[i]->data_.mutable_gpu_data();
     74       if (this->output_labels_) {
     75         prefetch_[i]->label_.mutable_gpu_data();
     76       }
     77     }
     78   }
     79 #endif
     80   DLOG(INFO) << "Initializing prefetch";
     81   this->data_transformer_->InitRand();
     82   StartInternalThread();//开启内部预取线程
     83   DLOG(INFO) << "Prefetch initialized.";
     84 }
     85 
     86 //内部线程入口
     87 template <typename Dtype>
     88 void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
     89 //创建CUDA Stream,非阻塞类型
     90 #ifndef CPU_ONLY
     91   cudaStream_t stream;
     92   if (Caffe::mode() == Caffe::GPU) {
     93     CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
     94   }
     95 #endif
     96 
     97   try {
     98     while (!must_stop()) {//循环载入批量数据
     99       Batch<Dtype>* batch = prefetch_free_.pop();//得到一个空闲batch
    100       load_batch(batch);//载入批量数据
    101 #ifndef CPU_ONLY
    102       if (Caffe::mode() == Caffe::GPU) {
    103         batch->data_.data().get()->async_gpu_push(stream);
    104         if (this->output_labels_) {
    105           batch->label_.data().get()->async_gpu_push(stream);
    106         }
    107         CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU
    108       }
    109 #endif
    110       prefetch_full_.push(batch);//加入到带负载的Batch队列中
    111     }
    112   } catch (boost::thread_interrupted&) {//捕获到异常则退出循环
    113     // Interrupted exception is expected on shutdown
    114   }
    115 #ifndef CPU_ONLY
    116   if (Caffe::mode() == Caffe::GPU) {
    117     CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA Stream
    118   }
    119 #endif
    120 }
    121 
    122 //前向传播函数
    123 template <typename Dtype>
    124 void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
    125     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
    126   if (prefetch_current_) {
    127     prefetch_free_.push(prefetch_current_);
    128   }
    129   //从带负载的Batch中取出一个Batch对象
    130   prefetch_current_ = prefetch_full_.pop("Waiting for data");
    131   // Reshape to loaded data.输出Top Blob根据Batch形状进行变形
    132   top[0]->ReshapeLike(prefetch_current_->data_);
    133   top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
    134   if (this->output_labels_) {//如果需要输出便签数据
    135     // Reshape to loaded labels.
    136     top[1]->ReshapeLike(prefetch_current_->label_);//Top Blob根据Batch中lable_形状进行变形
    137     top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
    138   }
    139 }
    140 
    141 #ifdef CPU_ONLY
    142 STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);
    143 #endif
    144 
    145 INSTANTIATE_CLASS(BaseDataLayer);
    146 INSTANTIATE_CLASS(BasePrefetchingDataLayer);
    147 
    148 }  // namespace caffe

    摘抄参看赵永科《21天实战caffe》

  • 相关阅读:
    【BZOJ3166】ALO(主席树)
    【UOJ#188】Sanrd(min_25筛)
    伯努利数
    【51Nod1258】序列求和V4(FFT)
    【BZOJ5306】[HAOI2018]染色(NTT)
    【BZOJ4943】【NOI2017】蚯蚓排队(哈希)
    【BZOJ4912】天才黑客(最短路,虚树)
    【BZOJ5333】荣誉称号(动态规划)
    NOI2018前的每日记录
    【BZOJ1088】扫雷(递推)
  • 原文地址:https://www.cnblogs.com/xiangfeidemengzhu/p/7125257.html
Copyright © 2020-2023  润新知