• caffe多标签输入和多任务学习


    拷贝convert_imageset,生成新工程convert_imageset_multi_label

    image

    修改源码

    std::ifstream infile(argv[2]);
        std::vector<std::pair<std::string, std::vector<float>> > lines;
        std::string filename;
        std::string label_count_string = argv[5];
        int label_count = std::atoi(label_count_string.c_str());
        std::vector<float> label(label_count);
        while (infile >> filename) {
            for (int i = 0; i < label_count; i++)
                infile >> label[i];
            lines.push_back(std::make_pair(filename, label));
        }
        if (FLAGS_shuffle) {
            // randomly shuffle data
            LOG(INFO) << "Shuffling data";
            shuffle(lines.begin(), lines.end());
        }
        LOG(INFO) << "A total of " << lines.size() << " images.";
    
        if (encode_type.size() && !encoded)
            LOG(INFO) << "encode_type specified, assuming encoded=true.";
    
        int resize_height = std::max<int>(0, FLAGS_resize_height);
        int resize_width = std::max<int>(0, FLAGS_resize_width);
    
        // Create new DB
        scoped_ptr<db::DB> db_image(db::GetDB(FLAGS_backend));
        scoped_ptr<db::DB> db_label(db::GetDB(FLAGS_backend));
        db_image->Open(argv[3], db::NEW);
        db_label->Open(argv[4], db::NEW);
        scoped_ptr<db::Transaction> txn_image(db_image->NewTransaction());
        scoped_ptr<db::Transaction> txn_label(db_label->NewTransaction());
    
    //   // Create new DB
    //   scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));
    //   db->Open(argv[3], db::NEW);
    //   scoped_ptr<db::Transaction> txn(db->NewTransaction());
    
      // Storing to db
      std::string root_folder(argv[1]);
      Datum datum_image;
      Datum datum_label;
      int count = 0;
      int data_size = 0;
      bool data_size_initialized = false;
    
      for (int line_id = 0; line_id < lines.size(); ++line_id) {
        bool status;
        std::string enc = encode_type;
        if (encoded && !enc.size()) {
          // Guess the encoding type from the file name
          string fn = lines[line_id].first;
          size_t p = fn.rfind('.');
          if ( p == fn.npos )
            LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";
          enc = fn.substr(p);
          std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);
        }
        status = ReadImageToDatum(root_folder + lines[line_id].first,
            lines[line_id].second[0], resize_height, resize_width, is_color,
            enc, &datum_image);
        if (status == false) continue;
        if (check_size) {
          if (!data_size_initialized) {
              data_size = datum_image.channels() * datum_image.height() * datum_image.width();
            data_size_initialized = true;
          } else {
              const std::string& data = datum_image.data();
            CHECK_EQ(data.size(), data_size) << "Incorrect data field size "
                << data.size();
          }
        }
        // sequential
        string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first;
    
        // Put in db
        string out;
        CHECK(datum_image.SerializeToString(&out));
        txn_image->Put(key_str, out);
        //////////////////////////////////////////////////////////////////////////    
        
        datum_label.set_channels(label_count);
        datum_label.set_height(1);
        datum_label.set_width(1);
        
        datum_label.clear_data();
        datum_label.clear_float_data();
        datum_label.set_encoded(false);
        std::vector<float> label_vec = lines[line_id].second;
        for (int i = 0; i < label_vec.size();i++)
        {
            datum_label.add_float_data(label_vec[i]);
        }
        string out_label;
        CHECK(datum_label.SerializeToString(&out_label));
        txn_label->Put(key_str, out_label);
        //////////////////////////////////////////////////////////////////////////
        if (++count % 1000 == 0) {
          // Commit db
          txn_image->Commit();
          txn_image.reset(db_image->NewTransaction());
    
          txn_label->Commit();
          txn_label.reset(db_label->NewTransaction());
    
          LOG(INFO) << "Processed " << count << " files.";
        }
      }
      // write the last batch
      if (count % 1000 != 0) {
        txn_image->Commit();
        txn_label->Commit();
        LOG(INFO) << "Processed " << count << " files.";
      }

    上述方式使用了二个data层,编译之后,使用如下方式生成:

    Buildx64Release>convert_imageset_multi_label.exe ./ train.txt data/train_image_lmdb data/train_label_lmdb 4

    train.txt文件格式如下:

    data/00A03AF5-41C7-4966-8EF3-8B2C90DCF75C_cgfn.jpg 1 2 3 6
    data/00A15FBD-9637-44C5-B2E7-81611263C88C_tmph.jpg 2 5 6 4

    网络配置文件需要加入slice层将标签分割开来

    layer {
      name: "slice"
      type: "Slice"
      bottom: "label"
      top: "label_1"
      top: "label_2"
      top: "label_3"
      top: "label_4"
      slice_param {
        axis: 1
        slice_point: 1
        slice_point: 2
        slice_point: 3
      }
    }

    image

    也可以通过python直接生成lmdb格式,其方式如下:

    # -*- coding: utf-8 -*-
    """
    Created on Sat Dec 24 20:57:28 2016
    
    @author: zhouly
    """
    
    import lmdb
    from skimage import io
    import numpy as np 
    import sys
    caffe_root = '../../'
    sys.path.insert(0, caffe_root + '/python')
    import caffe
    import cv2
    root = '../../'
    file_input=open(root+'data/train.txt','r')
    in_image_db=lmdb.open(root+'examples/99/train_image_lmdb', map_size=int(1e12))
    in_label_db=lmdb.open(root+'examples/99/train_label_lmdb', map_size=int(1e12))
    in_image_txn = in_image_db.begin(write=True)
    in_label_txn = in_label_db.begin(write=True)
    for in_idx, in_ in enumerate(file_input):
        content = in_.strip()
        content = content.split(' ')
        im_file = root + 'data/verification/' + content[0]
        try:
            im = io.imread(im_file)
        except:
            print '-------------------------', im_file
            continue
        im = im[:,:, 3]
        im = cv2.resize(im, (224, 224), interpolation=cv2.INTER_LINEAR)
        data = np.zeros((3, 224, 224), np.uint8)
        data[0, :, :] = im[:, :]
        data[1, :, :] = im[:, :]
        data[2, :, :] = im[:, :]
        im_dat = caffe.io.array_to_datum(data)
        in_image_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString())
        print 'data train: {} [{}]'.format(content[0], in_idx + 1)
        del im_file, im, im_dat, data
    
        target_label = np.zeros((4, 1, 1))
        target_label[0, 0, 0] = int(content[1])
        target_label[1, 0, 0] = int(content[2])
        target_label[2, 0, 0] = int(content[3])
        target_label[3, 0, 0] = int(content[4])
        label_data = caffe.io.array_to_datum(target_label)
        in_label_txn.put('{:0>10d}'.format(in_idx), label_data.SerializeToString())
        del target_label, label_data
    in_image_txn.commit()
    in_label_txn.commit()
    in_image_db.close()
    in_label_db.close()
    file_input.close()
  • 相关阅读:
    Enum, Generic and Templates
    Writing A Threadpool in Rust
    A First Look at Rust Language
    Closures in OOC
    Complexity Behind Closure
    Introduction to OOC Programming Language
    OOC,泛型,糟糕的设计。
    Enlightenment笔记
    Machine Learning/Random Projection
    Machine Learning/Introducing Logistic Function
  • 原文地址:https://www.cnblogs.com/linyuanzhou/p/6370677.html
Copyright © 2020-2023  润新知