• convert_mnist_data.cpp


    // This script converts the MNIST dataset to a lmdb (default) or
    // leveldb (--backend=leveldb) format used by caffe to load data.
    // Usage:
    //    convert_mnist_data [FLAGS] input_image_file input_label_file
    //                        output_db_file
    // The MNIST dataset could be downloaded at
    //    http://yann.lecun.com/exdb/mnist/
    
    #include <gflags/gflags.h>
    #include <glog/logging.h>
    #include <google/protobuf/text_format.h>
    
    #if defined(USE_LEVELDB) && defined(USE_LMDB)
    #include <leveldb/db.h>
    #include <leveldb/write_batch.h>
    #include <lmdb.h>
    #endif
    
    #include <stdint.h>
    #include <sys/stat.h>
    
    #include <fstream>  // NOLINT(readability/streams)
    #include <string>
    
    #include "boost/scoped_ptr.hpp"
    #include "caffe/proto/caffe.pb.h"
    #include "caffe/util/db.hpp"
    #include "caffe/util/format.hpp"
    
    #if defined(USE_LEVELDB) && defined(USE_LMDB)
    
    using namespace caffe;  // NOLINT(build/namespaces)
    using boost::scoped_ptr;
    using std::string;
    
    DEFINE_string(backend, "lmdb", "The backend for storing the result");
    
    uint32_t swap_endian(uint32_t val) {
        val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
        return (val << 16) | (val >> 16);
    }
    
    void convert_dataset(const char* image_filename, const char* label_filename,
            const char* db_path, const string& db_backend) {
      // Open files
      std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);
      std::ifstream label_file(label_filename, std::ios::in | std::ios::binary);
      CHECK(image_file) << "Unable to open file " << image_filename;
      CHECK(label_file) << "Unable to open file " << label_filename;
      // Read the magic and the meta data
      uint32_t magic;
      uint32_t num_items;
      uint32_t num_labels;
      uint32_t rows;
      uint32_t cols;
    
      image_file.read(reinterpret_cast<char*>(&magic), 4);
      magic = swap_endian(magic);
      CHECK_EQ(magic, 2051) << "Incorrect image file magic.";
      label_file.read(reinterpret_cast<char*>(&magic), 4);
      magic = swap_endian(magic);
      CHECK_EQ(magic, 2049) << "Incorrect label file magic.";
      image_file.read(reinterpret_cast<char*>(&num_items), 4);
      num_items = swap_endian(num_items);
      label_file.read(reinterpret_cast<char*>(&num_labels), 4);
      num_labels = swap_endian(num_labels);
      CHECK_EQ(num_items, num_labels);
      image_file.read(reinterpret_cast<char*>(&rows), 4);
      rows = swap_endian(rows);
      image_file.read(reinterpret_cast<char*>(&cols), 4);
      cols = swap_endian(cols);
    
    
      scoped_ptr<db::DB> db(db::GetDB(db_backend));
      db->Open(db_path, db::NEW);
      scoped_ptr<db::Transaction> txn(db->NewTransaction());
    
      // Storing to db
      char label;
      char* pixels = new char[rows * cols];
      int count = 0;
      string value;
    
      Datum datum;
      datum.set_channels(1);
      datum.set_height(rows);
      datum.set_width(cols);
      LOG(INFO) << "A total of " << num_items << " items.";
      LOG(INFO) << "Rows: " << rows << " Cols: " << cols;
      for (int item_id = 0; item_id < num_items; ++item_id) {
        image_file.read(pixels, rows * cols);
        label_file.read(&label, 1);
        datum.set_data(pixels, rows*cols);
        datum.set_label(label);
        string key_str = caffe::format_int(item_id, 8);
        datum.SerializeToString(&value);
    
        txn->Put(key_str, value);
    
        if (++count % 1000 == 0) {
          txn->Commit();
        }
      }
      // write the last batch
      if (count % 1000 != 0) {
          txn->Commit();
      }
      LOG(INFO) << "Processed " << count << " files.";
      delete[] pixels;
      db->Close();
    }
    
    int main(int argc, char** argv) {
    #ifndef GFLAGS_GFLAGS_H_
      namespace gflags = google;
    #endif
    
      FLAGS_alsologtostderr = 1;
    
      gflags::SetUsageMessage("This script converts the MNIST dataset to
    "
            "the lmdb/leveldb format used by Caffe to load data.
    "
            "Usage:
    "
            "    convert_mnist_data [FLAGS] input_image_file input_label_file "
            "output_db_file
    "
            "The MNIST dataset could be downloaded at
    "
            "    http://yann.lecun.com/exdb/mnist/
    "
            "You should gunzip them after downloading,"
            "or directly use data/mnist/get_mnist.sh
    ");
      gflags::ParseCommandLineFlags(&argc, &argv, true);
    
      const string& db_backend = FLAGS_backend;
    
      if (argc != 4) {
        gflags::ShowUsageWithFlagsRestrict(argv[0],
            "examples/mnist/convert_mnist_data");
      } else {  
        google::InitGoogleLogging(argv[0]);
        convert_dataset(argv[1], argv[2], argv[3], db_backend);
      }
      return 0;
    }
    #else
    int main(int argc, char** argv) {
      LOG(FATAL) << "This example requires LevelDB and LMDB; " <<
      "compile with USE_LEVELDB and USE_LMDB.";
    }
    #endif  // USE_LEVELDB and USE_LMDB
    

      代码中DEFINE_string(backend,"lmdb","the backend for storing the result") 这句采用的gflags工具,为google开源工具,说白了作用就是将backend 这个string类型的变量的默认值为“lamdb”, 在执行没有这个参数的前提下,就使用这个默认值。也可以使用其他比如DEFINE_int64,DEFINE_uint64,DEFINE_bool,DEFINE_double,DEFINE_string等等。

      代码中  std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);ifstream表示输入类,image_file为这种对象,std::ios::binary|std::ios::in表示二进制和输入,类似于C中的“rb"

      

  • 相关阅读:
    C#_WinForm接收命令行参数
    SQLite不支持的SQL语法总结
    Thirft框架介绍
    jquery获取复选框的值
    REST构架风格介绍:状态表述转移
    RESTful HTTP的实践infoQ
    C#如何在webBrowser1控件通过TagName,Name查找元素(没有ID时)遍历窗体元素
    Html TO Ubb and Ubb TO Html
    SQL 位运算
    Memcached真的过时了吗?【转帖】
  • 原文地址:https://www.cnblogs.com/winnter/p/6350204.html
Copyright © 2020-2023  润新知