• prior_box层


    https://www.jianshu.com/p/5195165bbd06

    1.step_w、step_h其实就相当于faster中的feat_stride,也就是把这些点从feature map映射回原图,同时也可以看出min_size、max_size这些都是直接在针对原图来讲的

    2.以mobileNet-ssd为例子:https://github.com/chuanqi305/MobileNet-SSD/blob/master/train.prototxt

    layer {
      name: "conv11_mbox_priorbox"
      type: "PriorBox"
      bottom: "conv11"
      bottom: "data"
      top: "conv11_mbox_priorbox"
      prior_box_param {
        min_size: 60.0
        aspect_ratio: 2.0
        flip: true
        clip: false
        variance: 0.1
        variance: 0.1
        variance: 0.2
        variance: 0.2
        offset: 0.5
      }
    }
    layer {
      name: "conv13_mbox_priorbox"
      type: "PriorBox"
      bottom: "conv13"
      bottom: "data"
      top: "conv13_mbox_priorbox"
      prior_box_param {
        min_size: 105.0
        max_size: 150.0
        aspect_ratio: 2.0
        aspect_ratio: 3.0
        flip: true
        clip: false
        variance: 0.1
        variance: 0.1
        variance: 0.2
        variance: 0.2
        offset: 0.5
      }
    }

    只有conv11的anchor个数是3,其他5层都是6,原因是conv11只有min_size,没有max_size,并且aspect_ratio只有1个,其他5层都是两个,也就是说conv11是1+1*2=3,其他5层是1+1+2*2=6

    prior_box_layer.cpp里,aspect_ratios_根据这层的param存储相应的aspect ratio.如果flip为true,param里一个aspect ratio就要存储他本身和他的倒数两个值

      aspect_ratios_.clear();
      aspect_ratios_.push_back(1.);
      flip_ = prior_box_param.flip();
      for (int i = 0; i < prior_box_param.aspect_ratio_size(); ++i) {
        float ar = prior_box_param.aspect_ratio(i);
        bool already_exist = false;
        for (int j = 0; j < aspect_ratios_.size(); ++j) {     //检查是否有重复的
          if (fabs(ar - aspect_ratios_[j]) < 1e-6) {
            already_exist = true;
            break;
          }
        }
        if (!already_exist) {
          aspect_ratios_.push_back(ar);              //如果flip为true,存储aspect ratio和他的倒数,否则只存储aspect ratio本身
          if (flip_) {
            aspect_ratios_.push_back(1./ar);
          }
        }
    }

    对于每个点,先计算以min_size为长宽的正方形这个anchor;然后如果有max_size,计算以sqrt(min_size_ * max_size_)为长宽的正方形;然后计算aspect_ratios_中所有的aspect ratios,然后以这个aspect ratios计算box_width = min_size_ * sqrt(ar)和box_height = min_size_ / sqrt(ar),prototxt中的param里,一个ratio要存储他和他的倒数,这样一个ratio就要求两个anchor

      for (int h = 0; h < layer_height; ++h) {
        for (int w = 0; w < layer_width; ++w) {
          float center_x = (w + offset_) * step_w;
          float center_y = (h + offset_) * step_h;
          float box_width, box_height;
          for (int s = 0; s < min_sizes_.size(); ++s) {
            int min_size_ = min_sizes_[s];
            // first prior: aspect_ratio = 1, size = min_size
            box_width = box_height = min_size_;
            // xmin
            top_data[idx++] = (center_x - box_width / 2.) / img_width;
            // ymin
            top_data[idx++] = (center_y - box_height / 2.) / img_height;
            // xmax
            top_data[idx++] = (center_x + box_width / 2.) / img_width;
            // ymax
            top_data[idx++] = (center_y + box_height / 2.) / img_height;
    
            if (max_sizes_.size() > 0) {
              CHECK_EQ(min_sizes_.size(), max_sizes_.size());
              int max_size_ = max_sizes_[s];
              // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
              box_width = box_height = sqrt(min_size_ * max_size_);
              // xmin
              top_data[idx++] = (center_x - box_width / 2.) / img_width;
              // ymin
              top_data[idx++] = (center_y - box_height / 2.) / img_height;
              // xmax
              top_data[idx++] = (center_x + box_width / 2.) / img_width;
              // ymax
              top_data[idx++] = (center_y + box_height / 2.) / img_height;
            }
    
            // rest of priors
            for (int r = 0; r < aspect_ratios_.size(); ++r) {
              float ar = aspect_ratios_[r];
              if (fabs(ar - 1.) < 1e-6) {
                continue;
              }
              box_width = min_size_ * sqrt(ar);
              box_height = min_size_ / sqrt(ar);
              // xmin
              top_data[idx++] = (center_x - box_width / 2.) / img_width;
              // ymin
              top_data[idx++] = (center_y - box_height / 2.) / img_height;
              // xmax
              top_data[idx++] = (center_x + box_width / 2.) / img_width;
              // ymax
              top_data[idx++] = (center_y + box_height / 2.) / img_height;
            }
          }
        }
    }

    3.从reshape可以看出,输出的shape是(1,2,layer_width * layer_height * num_priors_ * 4),layer_width * layer_height * num_priors_ * 4是每个feature map上每个点乘以anchor数,再每个anchor乘以对应的4个坐标,比如整个blob中第一个4个值存储的就是feature map中第一个像素点的min size对应的正方形那个anchor的4个坐标值,第二个就是第一个像素点对应的max size对应的anchor的4个坐标值

    void PriorBoxLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
          const vector<Blob<Dtype>*>& top) {
      const int layer_width = bottom[0]->width();
      const int layer_height = bottom[0]->height();
      vector<int> top_shape(3, 1);
      // Since all images in a batch has same height and width, we only need to
      // generate one set of priors which can be shared across all images.
      top_shape[0] = 1;
      // 2 channels. First channel stores the mean of each prior coordinate.
      // Second channel stores the variance of each prior coordinate.
      top_shape[1] = 2;
      top_shape[2] = layer_width * layer_height * num_priors_ * 4;
      CHECK_GT(top_shape[2], 0);
      top[0]->Reshape(top_shape);
    }

    注意到,输出是2channel的,第一个channel就是存储的真实的每个anchor的4个坐标,第二个channel存储的就是variance,variance_在layer_setup里面就初始化了4个值,这4个值就是来自于prototxt的param.这4个值分别对应4个坐标点,对于每个anchor,都会有对应这4个variance值,这些值存储在第二个channel,并且在第二个channel里面每4个值每4个值重复

     top_data += top[0]->offset(0, 1);
      if (variance_.size() == 1) {
        caffe_set<Dtype>(dim, Dtype(variance_[0]), top_data);
      } else {
        int count = 0;
        for (int h = 0; h < layer_height; ++h) {
          for (int w = 0; w < layer_width; ++w) {
            for (int i = 0; i < num_priors_; ++i) {
              for (int j = 0; j < 4; ++j) {
                top_data[count] = variance_[j];
                ++count;
              }
            }
          }
        }
    }

     4.http://www.360doc.com/content/17/0810/10/10408243_678091430.shtml

    这两段代码都来自于bbox_util.cppDecodeBBox函数.prior_box层输出的prior_variance就是一个系数,这个系数乘以bounding box regression的回归值,在faster中,是直接在anchor的坐标上加bounding box regression,ssd这里可以对回归乘以一个系数.当然DecodeBBox其实也可以使用faster那种方式,可以通过参数控制

    else {
          // variance is encoded in bbox, we need to scale the offset accordingly.
          decode_bbox->set_xmin(
              prior_bbox.xmin() + prior_variance[0] * bbox.xmin());
          decode_bbox->set_ymin(
              prior_bbox.ymin() + prior_variance[1] * bbox.ymin());
          decode_bbox->set_xmax(
              prior_bbox.xmax() + prior_variance[2] * bbox.xmax());
          decode_bbox->set_ymax(
              prior_bbox.ymax() + prior_variance[3] * bbox.ymax());
    }
    else {
          // variance is encoded in bbox, we need to scale the offset accordingly.
          decode_bbox->set_xmin(
              prior_bbox.xmin() + prior_variance[0] * bbox.xmin() * prior_width);
          decode_bbox->set_ymin(
              prior_bbox.ymin() + prior_variance[1] * bbox.ymin() * prior_height);
          decode_bbox->set_xmax(
              prior_bbox.xmax() + prior_variance[2] * bbox.xmax() * prior_width);
          decode_bbox->set_ymax(
              prior_bbox.ymax() + prior_variance[3] * bbox.ymax() * prior_height);
    }

     5.https://zhuanlan.zhihu.com/p/33544892 这个介绍了每层的prior如何确定min_size

    对于后面的特征图,先验框尺度按照上面公式线性增加,但是先将尺度比例先扩大100倍,此时增长步长为 lfloor frac{lfloor s_{max}	imes 100
floor - lfloor s_{min}	imes 100
floor}{m-1}
floor=17 ,这样各个特征图的 s_k20, 37, 54, 71, 88 ,将这些比例除以100,然后再乘以图片大小,可以得到各个特征图的尺度为 60,111, 162,213,264 ,这种计算方式是参考SSD的Caffe源码。综上,可以得到各个特征图的先验框尺度 30,60,111, 162,213,264

  • 相关阅读:
    git学习笔记
    ExtJs自学教程(1):一切从API開始
    Floodlight 处理交换机增加/移除过程
    飘逸的python
    Mapreduce运行过程分析(基于Hadoop2.4)——(三)
    oracle中LAG()和LEAD()等分析统计函数的使用方法(统计月增长率)
    Linux学习笔记总结
    看完锤子手机公布会直播 有感
    iOS iOS8中 问题&quot;registerForRemoteNotificationTypes: is not supported in iOS 8.0 and later&quot; 解决方式
    读书笔记-HBase in Action-第二部分Advanced concepts-(3)非Javaclient
  • 原文地址:https://www.cnblogs.com/ymjyqsx/p/9230939.html
Copyright © 2020-2023  润新知