在caffe根目录下,执行以下脚本
./data/mnist/get_mnist.sh
脚本内容如下
#!/usr/bin/env sh # This scripts downloads the mnist data and unzips it. DIR="$( cd "$(dirname "$0")" ; pwd -P )" cd "$DIR" echo "Downloading..." # 下载 了四个文件分别是rain-images-idx3-ubyte # train-labels-idx1-ubyte # t10k-images-idx3-ubyte # t10k-labels-idx1-ubyte for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte do if [ ! -e $fname ]; then
# 下载文件 wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
# 解压文件 gunzip ${fname}.gz fi done
下载完成
shell 命令解析
wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
在 wget 对 https 站点的请求时不再验证证书,即“不请求验证”的意思,即可下载
gunzip ${fname}.gz
gunzip 解压文件
2.数据类型转换
在caffe根目录下执行
./examples/mnist/create_mnist.sh
原始数据为二进制文件,需要转化为level db或者LM_DB才能被caffe识别
create_mnist.sh
#!/usr/bin/env sh # This script converts the mnist data into lmdb/leveldb format, # depending on the value assigned to $BACKEND. set -e EXAMPLE=examples/mnist DATA=data/mnist BUILD=build/examples/mnist # 设置后台Db为lmdb BACKEND="lmdb" #输出日志 echo "Creating ${BACKEND}..." rm -rf $EXAMPLE/mnist_train_${BACKEND} rm -rf $EXAMPLE/mnist_test_${BACKEND} # 调用 convert_mnist_data.bin对于数据类型进行转换 $BUILD/convert_mnist_data.bin $DATA/train-images-idx3-ubyte \ $DATA/train-labels-idx1-ubyte $EXAMPLE/mnist_train_${BACKEND} --backend=${BACKEND} $BUILD/convert_mnist_data.bin $DATA/t10k-images-idx3-ubyte \ $DATA/t10k-labels-idx1-ubyte $EXAMPLE/mnist_test_${BACKEND} --backend=${BACKEND} echo "Done."
convert_mnist_data.bin使用由convert_mnist_data.cpp生成 ,源码如下
// 该脚本将mnist数据集转换为lmdd(默认类型)或者leveldb(通过设置--backend=leveldb格式以便于caffe加载数据 // 使用方式: // convert_mnist_data [FLAGS] input_image_file input_label_file output_db_file // The MNIST dataset could be downloaded at // http://yann.lecun.com/exdb/mnist/ #include <gflags/gflags.h> //调用gflags用于处理命令行标记 #include <glog/logging.h> //glog轻量级日志,用于日志输出 #include <google/protobuf/text_format.h>//protobuf //用于序列化网络文件,在效率上以及储存数据大小方面是由于jason 以及XML #if defined(USE_LEVELDB) && defined(USE_LMDB) #include <leveldb/db.h> //使用leveldb key-val类型数据库,不属于网络类型数据库,用于实现under_map类型的查找擦 #include <leveldb/write_batch.h>// #include <lmdb.h>//使用lmdb key-val类型数据库,image-net 就是使用该类型的数据库,进行图像的label-bytes(原始数据的存储) #endif #include <stdint.h> //c语言中定义的整数类型 #include <sys/stat.h>//c语言中文件,用于获取文件的全部属性 #include <fstream> // NOLINT(readability/streams) c++ 模式对流文件进行读写 #include <string> #include "boost/scoped_ptr.hpp"//相当于share_ptr但是,但是不能转让 #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/format.hpp" #if defined(USE_LEVELDB) && defined(USE_LMDB) using namespace caffe; // NOLINT(build/namespaces) using boost::scoped_ptr; using std::string; DEFINE_string(backend, "lmdb", "The backend for storing the result");
//大小端转换 uint32_t swap_endian(uint32_t val) { val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); return (val << 16) | (val >> 16); } void convert_dataset(const char* image_filename, const char* label_filename, const char* db_path, const string& db_backend) { // Open files std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); CHECK(image_file) << "Unable to open file " << image_filename; CHECK(label_file) << "Unable to open file " << label_filename; // Read the magic and the meta data uint32_t magic; uint32_t num_items; uint32_t num_labels; uint32_t rows; uint32_t cols; image_file.read(reinterpret_cast<char*>(&magic), 4); magic = swap_endian(magic); CHECK_EQ(magic, 2051) << "Incorrect image file magic."; label_file.read(reinterpret_cast<char*>(&magic), 4); magic = swap_endian(magic); CHECK_EQ(magic, 2049) << "Incorrect label file magic."; image_file.read(reinterpret_cast<char*>(&num_items), 4); num_items = swap_endian(num_items); label_file.read(reinterpret_cast<char*>(&num_labels), 4); num_labels = swap_endian(num_labels); CHECK_EQ(num_items, num_labels); image_file.read(reinterpret_cast<char*>(&rows), 4); rows = swap_endian(rows); image_file.read(reinterpret_cast<char*>(&cols), 4); cols = swap_endian(cols); scoped_ptr<db::DB> db(db::GetDB(db_backend)); db->Open(db_path, db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); // Storing to db char label; char* pixels = new char[rows * cols]; int count = 0; string value; Datum datum; datum.set_channels(1); datum.set_height(rows); datum.set_width(cols); LOG(INFO) << "A total of " << num_items << " items."; LOG(INFO) << "Rows: " << rows << " Cols: " << cols; for (int item_id = 0; item_id < num_items; ++item_id) {
//读取一次数据以及label image_file.read(pixels, rows * cols); label_file.read(&label, 1);
//存储到Datum数据类型,传入数值 datum.set_data(pixels, rows*cols); datum.set_label(label);
//获取当前样本的it转为string string key_str = caffe::format_int(item_id, 8);
//将样本图像数据以及label序列化 datum.SerializeToString(&value); //txn 存储到数据库中 txn->Put(key_str, value); if (++count % 1000 == 0) {
//批量提交 txn->Commit(); } } // write the last batch
// if (count % 1000 != 0) { txn->Commit(); } LOG(INFO) << "Processed " << count << " files."; delete[] pixels; db->Close(); } int main(int argc, char** argv) { #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif FLAGS_alsologtostderr = 1; gflags::SetUsageMessage("This script converts the MNIST dataset to\n" "the lmdb/leveldb format used by Caffe to load data.\n" "Usage:\n" " convert_mnist_data [FLAGS] input_image_file input_label_file " "output_db_file\n" "The MNIST dataset could be downloaded at\n" " http://yann.lecun.com/exdb/mnist/\n" "You should gunzip them after downloading," "or directly use data/mnist/get_mnist.sh\n"); gflags::ParseCommandLineFlags(&argc, &argv, true); const string& db_backend = FLAGS_backend; if (argc != 4) { gflags::ShowUsageWithFlagsRestrict(argv[0], "examples/mnist/convert_mnist_data"); } else {
//初始化log库 google::InitGoogleLogging(argv[0]); convert_dataset(argv[1], argv[2], argv[3], db_backend); } return 0; } #else int main(int argc, char** argv) { LOG(FATAL) << "This example requires LevelDB and LMDB; " << "compile with USE_LEVELDB and USE_LMDB."; } #endif // USE_LEVELDB and USE_LMDB
3.lenet-5 模型解析
caffe例子中lenet-5与原版稍有不同
./example/mnist_train_test_prototxt
name: "LeNet" //网络名称LeNet layer
{ //定义第一曾layer name: "mnist" //层名称为mnist type: "Data" //层类型为数据层 如果是Data,
top: "data" //层输出两个,分别为data和label top: "label" include
{ phase: TRAIN //该层面只在训练层有效 } transform_param
{ scale: 0.00390625 //数据变换使用的放缩因子 } data_param { //数据参数层 source: "examples/mnist/mnist_train_lmdb" //LMDB路径 batch_size: 64 //批量数据,一次取64张图片 backend: LMDB //后台数据为LMDB } } layer
{ //定一个参数曾,但是该这里参数只在TEST阶段有效 name: "mnist" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { scale: 0.00390625 } data_param { source: "examples/mnist/mnist_test_lmdb" batch_size: 100 backend: LMDB } } layer
{ //定义第一个卷积层conv1, name: "conv1" //名称 type: "Convolution" //数据类型为卷积 bottom: "data" //输入数据为data top: "conv1" //输出数据为conv1 param { lr_mult: 1 //权值学习速率倍乘因子,1表示与全局参数一致 } param { lr_mult: 2 //bias学习速率倍乘因子,1表示与全局参数一致 } convolution_param { //卷积计算参数 num_output: 20 //输出feature-map数量为20 kernel_size: 5 //卷积核为5X5 stride: 1 //卷积输出的跳跃间隔:1表示连续输出,无跳跃 weight_filler { //权值使用xavier填充 type: "xavier" } bias_filler { //bias使用常数填充,默认为0 type: "constant" } } } layer { //定义下下采样层pool1,输入blob为conv1,输出blob为pool1 name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { //参数 pool: MAX //使用最大值下采样方法 kernel_size: 2 //下采样窗口尺寸2×2 stride: 2 //下采样输出跳跃区间2×2 } } layer { //同上 name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 50 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { //同上 name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { //全链接层 name: "ip1" type: "InnerProduct" bottom: "pool2" top: "ip1" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { //全连接层参数 num_output: 500 //该层输入参数为500 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { //非线性层,使用RelU方法 name: "relu1" type: "ReLU" bottom: "ip1" top: "ip1" } layer { name: "ip2" type: "InnerProduct" bottom: "ip1" top: "ip2" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 10 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { //分类准确率层,只在test阶段有效,输入ip2以及label,输出accuracy name: "accuracy" type: "Accuracy" bottom: "ip2" bottom: "label" top: "accuracy" include { phase: TEST } } layer { //损失层,损失函数采用SoftmaxWithLoss,输入ip2以及label,输出loss name: "loss" type: "SoftmaxWithLoss" bottom: "ip2" bottom: "label" top: "loss" }
模型可视化 工具http://ethereon.github.io/netscope/#/editor
网络解析:
数据源mnist负责从原始数据到lmdb数据库转化的图像数据data和标签数据label
图像数据后续被送入CNN结构中进行处理。
CNN结构包括由一组conv(1,2)+加下采样pool(1,2)交替形成的特征层以及全连接层ip1和ip2(类似多层感知结构)。
对ip2的输出进一步同标签数据label对比,可计算分类准确性accuracy以及loss.
4.训练模型
模型训练脚本
examples/mnist/train_lenet.sh
脚本如下
./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt
之前编译完成了build/tools/caffe.bin 二进制文件,参数-solver=examples/mnist/lenet_solver.prototxt指定了训练超参数(hyper-parameter)
机器学习模型中一般有两类参数:一类需要从数据中学习和估计得到,称为模型参数(Parameter)---即模型本身的参数。比如,线性回归直线的加权系数(斜率)及其偏差项(截距)都是模型参数。还有一类则是机器学习算法中的调优参数(tuning parameters),需要人为设定,称为超参数,所谓调参数,是调节超参数,eg,梯度下降法中的学习速率α,迭代次数epoch,批量大小batch-size
lenet_solver.prototxt 内容如下
# The train/test net protocol buffer definition net: "examples/mnist/lenet_train_test.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, //batch 大小为100,预测阶段迭代次数为100 # covering the full 10,000 testing images.//覆盖一万张图片 test_iter: 100 //预测阶段的迭代次数 # Carry out testing every 500 training iterations. //每训练500次进行一次预测 test_interval: 500 # The base learning rate, momentum and the weight decay of the network. //网络学习率,冲量以及权衰量 base_lr: 0.01 momentum: 0.9 weight_decay: 0.0005 # The learning rate policy //学习率的衰减策略 lr_policy: "inv" gamma: 0.0001 power: 0.75 # Display every 100 iterations //每进行100次迭代打印一次log display: 100 # The maximum number of iterations 最大迭代次数10000 max_iter: 10000 # snapshot intermediate results //每迭代5000次进行一次快照 snapshot: 5000 snapshot_prefix: "examples/mnist/lenet" # solver mode: CPU or GPU //求解模式为GPU solver_mode: GPU
进入caffe 根目录
cd caffe
运行脚本
./examples/mnist/train_lenet.sh
训练过程
1.从文件初始化train网络
分析需要反向传播的层
生成test网络
同样进行网络创建以及计算反向传播层(省略)
初始化第一次进行准确率的计算后开始进行训练操作
如下图所示,每100次迭代输入打印一次日志,loss值逐渐变小,学习率逐渐降低
在第10000次迭代时候输出对应的accuray以及损失率
5.模型预测
./buld/tools/caffe
以上为caffe.bin的命令行参数
./build/tools/caffe test -model examples/mnist/lenet_train_test.prototxt -weights examples/mnist/lenet_iter_10000.caffemodel -iterations 100
预测 模型路径 指定路径预先训练的权重值文件 指定迭代次数100