• RoIPooling


     

    代码:

    template <typename Dtype>
    void ROIPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
          const vector<Blob<Dtype>*>& top) {
      //输入有两部分组成,data和rois
      const Dtype* bottom_data = bottom[0]->cpu_data();
      const Dtype* bottom_rois = bottom[1]->cpu_data();
      // Number of ROIs
      int num_rois = bottom[1]->num();
      int batch_size = bottom[0]->num();
      int top_count = top[0]->count();
      Dtype* top_data = top[0]->mutable_cpu_data();
      caffe_set(top_count, Dtype(-FLT_MAX), top_data);
      int* argmax_data = max_idx_.mutable_cpu_data();
      caffe_set(top_count, -1, argmax_data);
    
      // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
      for (int n = 0; n < num_rois; ++n) {
        int roi_batch_ind = bottom_rois[0];
        //把原图的坐标映射到feature map上面
        int roi_start_w = round(bottom_rois[1] * spatial_scale_);
        int roi_start_h = round(bottom_rois[2] * spatial_scale_);
        int roi_end_w = round(bottom_rois[3] * spatial_scale_);
        int roi_end_h = round(bottom_rois[4] * spatial_scale_);
        //计算每个roi在feature map上面的大小
        int roi_height = max(roi_end_h - roi_start_h + 1, 1);
        int roi_width = max(roi_end_w - roi_start_w + 1, 1);
        //pooling之后的feature map的一个值对应于pooling之前的feature map上的大小
        //注:由于roi的大小不一致,所以每次都需要计算一次
        const Dtype bin_size_h = static_cast<Dtype>(roi_height)
                                 / static_cast<Dtype>(pooled_height_);
        const Dtype bin_size_w = static_cast<Dtype>(roi_width)
                                 / static_cast<Dtype>(pooled_width_);
        //找到对应的roi的feature map,如果input data的batch size为1
        //那么roi_batch_ind=0
        const Dtype* batch_data = bottom_data + bottom[0]->offset(roi_batch_ind);
        //pooling的过程是针对每一个channel的,所以需要循环遍历
        for (int c = 0; c < channels_; ++c) {
          //计算output的每一个值,所以需要遍历一遍output,然后求出所有值
          for (int ph = 0; ph < pooled_height_; ++ph) {
            for (int pw = 0; pw < pooled_width_; ++pw) {
              // Compute pooling region for this output unit:
              //  start (included) = floor(ph * roi_height / pooled_height_)
              //  end (excluded) = ceil((ph + 1) * roi_height / pooled_height_)
              // 计算output上的一点对应于input上面区域的大小[hstart, wstart, hend, wend]
              int hstart = static_cast<int>(floor(static_cast<Dtype>(ph)
                                                  * bin_size_h));
              int hend = static_cast<int>(ceil(static_cast<Dtype>(ph + 1)
                                               * bin_size_h));
              int wstart = static_cast<int>(floor(static_cast<Dtype>(pw)
                                                  * bin_size_w));
              int wend = static_cast<int>(ceil(static_cast<Dtype>(pw + 1)
                                               * bin_size_w));
              //将映射后的区域平动到对应的位置[hstart, wstart, hend, wend]
              hstart = min(max(hstart + roi_start_h, 0), height_);
              hend = min(max(hend + roi_start_h, 0), height_);
              wstart = min(max(wstart + roi_start_w, 0), width_);
              wend = min(max(wend + roi_start_w, 0), width_);
              //如果映射后的矩形框不符合
              bool is_empty = (hend <= hstart) || (wend <= wstart);
              //pool_index指的是此时计算的output的值对应于output的位置
              const int pool_index = ph * pooled_width_ + pw;
              //如果矩形不符合,此处output的值设为0,此处的对应于输入区域的最大值为-1
              if (is_empty) {
                top_data[pool_index] = 0;
                argmax_data[pool_index] = -1;
              }
              //遍历output的值对应于input的区域块
              for (int h = hstart; h < hend; ++h) {
                for (int w = wstart; w < wend; ++w) {
                 // 对应于input上的位置
                  const int index = h * width_ + w;
                  //计算区域块的最大值,保存在output对应的位置上
                  //同时记录最大值的索引
                  if (batch_data[index] > top_data[pool_index]) {
                    top_data[pool_index] = batch_data[index];
                    argmax_data[pool_index] = index;
                  }
                }
              }
            }
          }
          // Increment all data pointers by one channel
          batch_data += bottom[0]->offset(0, 1);
          top_data += top[0]->offset(0, 1);
          argmax_data += max_idx_.offset(0, 1);
        }
        // Increment ROI data pointer
        bottom_rois += bottom[1]->offset(1);
      }
    }
    

      

  • 相关阅读:
    【转】java线程池ThreadPoolExecutor使用介绍
    java的类加载机制
    java面试问题分类
    ConcurrentHashMap总结
    ffmpeg对视频封装和分离
    SSM的整合
    单例模式的七种写法
    SecureCRT的快捷键
    linux下mysql常用命令
    maven操作
  • 原文地址:https://www.cnblogs.com/fanhaha/p/7815151.html
Copyright © 2020-2023  润新知