一.代码实现
1 # -*- coding: utf-8 -*- 2 """ 3 Created on Sat Feb 9 15:33:39 2019 4 5 @author: zhen 6 """ 7 8 from keras.applications.vgg16 import VGG16 9 10 from keras.layers import Flatten 11 from keras.layers import Dense 12 from keras.layers import Dropout 13 from keras.models import Model 14 from keras.optimizers import SGD 15 16 from keras.datasets import mnist 17 18 import cv2 19 import numpy as np 20 # 因初始设置需大量内存(至少24G),现设置为最小分辨率以降低内存的要求 21 model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(48, 48, 3)) 22 23 for layer in model_vgg.layers: 24 layer.trainable = False 25 model = Flatten(name='flatten')(model_vgg.output) # 扁平化 26 model = Dense(4096, activation='relu', name='fc1')(model) 27 model = Dense(4096, activation='relu', name='fc2')(model) 28 model = Dropout(0.5)(model) 29 model = Dense(10, activation='softmax')(model) 30 model_vgg_mnist = Model(inputs=model_vgg.input, outputs=model, name='vgg16') 31 32 model_vgg_mnist.summary() 33 34 # VGGNet初始推荐 35 model_vgg = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3)) 36 for layer in model_vgg.layers: 37 layer.trainable = False 38 39 model = Flatten()(model_vgg.output) 40 model = Dense(4096, activation='relu', name='fc1')(model) 41 model = Dense(4096, activation='relu', name='fc2')(model) 42 model = Dropout(0.5)(model) 43 model = Dense(10, activation='softmax', name='prediction')(model) 44 model_vgg_mnist_pretrain = Model(model_vgg.input, model, name='vgg16_pretrain') 45 46 model_vgg_mnist_pretrain.summary() 47 48 sgd = SGD(lr=0.05, decay=1e-5) # 随机梯度下降 49 model_vgg_mnist.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) 50 51 (x_train, y_train), (x_test, y_test) = mnist.load_data("../test_data_home") 52 x_train, y_train = x_train[:1000], y_train[:1000] 53 x_test, y_test = x_test[:1000], y_test[:1000] 54 # GRAY两通道转换为RGB三通道 55 x_train = [cv2.cvtColor(cv2.resize(i, (48, 48)), cv2.COLOR_GRAY2RGB) for i in x_train] 56 x_train = np.concatenate([arr[np.newaxis] for arr in x_train]).astype('float32') 57 58 x_test = [cv2.cvtColor(cv2.resize(i, (48, 48)), cv2.COLOR_GRAY2RGB) for i in x_test] 59 x_test = np.concatenate([arr[np.newaxis] for arr in x_test]).astype('float32') 60 61 print(x_train.shape) 62 print(x_test.shape) 63 64 x_train = x_train / 255 65 x_test = x_test / 255 66 67 def tran_y(y): 68 y_ohe = np.zeros(10) 69 y_ohe[y] = 1 70 return y_ohe 71 72 y_train_ohe = np.array([tran_y(y_train[i]) for i in range(len(y_train))]) 73 y_test_ohe = np.array([tran_y(y_test[i]) for i in range(len(y_test))]) 74 75 model_vgg_mnist.fit(x_train, y_train_ohe, validation_data=(x_test, y_test_ohe), epochs=20, batch_size=100)
二.结果
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_9 (InputLayer) (None, 48, 48, 3) 0 _________________________________________________________________ block1_conv1 (Conv2D) (None, 48, 48, 64) 1792 _________________________________________________________________ block1_conv2 (Conv2D) (None, 48, 48, 64) 36928 _________________________________________________________________ block1_pool (MaxPooling2D) (None, 24, 24, 64) 0 _________________________________________________________________ block2_conv1 (Conv2D) (None, 24, 24, 128) 73856 _________________________________________________________________ block2_conv2 (Conv2D) (None, 24, 24, 128) 147584 _________________________________________________________________ block2_pool (MaxPooling2D) (None, 12, 12, 128) 0 _________________________________________________________________ block3_conv1 (Conv2D) (None, 12, 12, 256) 295168 _________________________________________________________________ block3_conv2 (Conv2D) (None, 12, 12, 256) 590080 _________________________________________________________________ block3_conv3 (Conv2D) (None, 12, 12, 256) 590080 _________________________________________________________________ block3_pool (MaxPooling2D) (None, 6, 6, 256) 0 _________________________________________________________________ block4_conv1 (Conv2D) (None, 6, 6, 512) 1180160 _________________________________________________________________ block4_conv2 (Conv2D) (None, 6, 6, 512) 2359808 _________________________________________________________________ block4_conv3 (Conv2D) (None, 6, 6, 512) 2359808 _________________________________________________________________ block4_pool (MaxPooling2D) (None, 3, 3, 512) 0 _________________________________________________________________ block5_conv1 (Conv2D) (None, 3, 3, 512) 2359808 _________________________________________________________________ block5_conv2 (Conv2D) (None, 3, 3, 512) 2359808 _________________________________________________________________ block5_conv3 (Conv2D) (None, 3, 3, 512) 2359808 _________________________________________________________________ block5_pool (MaxPooling2D) (None, 1, 1, 512) 0 _________________________________________________________________ flatten (Flatten) (None, 512) 0 _________________________________________________________________ fc1 (Dense) (None, 4096) 2101248 _________________________________________________________________ fc2 (Dense) (None, 4096) 16781312 _________________________________________________________________ dropout_9 (Dropout) (None, 4096) 0 _________________________________________________________________ dense_5 (Dense) (None, 10) 40970 ================================================================= Total params: 33,638,218 Trainable params: 18,923,530 Non-trainable params: 14,714,688 _________________________________________________________________ _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_10 (InputLayer) (None, 224, 224, 3) 0 _________________________________________________________________ block1_conv1 (Conv2D) (None, 224, 224, 64) 1792 _________________________________________________________________ block1_conv2 (Conv2D) (None, 224, 224, 64) 36928 _________________________________________________________________ block1_pool (MaxPooling2D) (None, 112, 112, 64) 0 _________________________________________________________________ block2_conv1 (Conv2D) (None, 112, 112, 128) 73856 _________________________________________________________________ block2_conv2 (Conv2D) (None, 112, 112, 128) 147584 _________________________________________________________________ block2_pool (MaxPooling2D) (None, 56, 56, 128) 0 _________________________________________________________________ block3_conv1 (Conv2D) (None, 56, 56, 256) 295168 _________________________________________________________________ block3_conv2 (Conv2D) (None, 56, 56, 256) 590080 _________________________________________________________________ block3_conv3 (Conv2D) (None, 56, 56, 256) 590080 _________________________________________________________________ block3_pool (MaxPooling2D) (None, 28, 28, 256) 0 _________________________________________________________________ block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160 _________________________________________________________________ block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808 _________________________________________________________________ block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808 _________________________________________________________________ block4_pool (MaxPooling2D) (None, 14, 14, 512) 0 _________________________________________________________________ block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808 _________________________________________________________________ block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808 _________________________________________________________________ block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808 _________________________________________________________________ block5_pool (MaxPooling2D) (None, 7, 7, 512) 0 _________________________________________________________________ flatten_5 (Flatten) (None, 25088) 0 _________________________________________________________________ fc1 (Dense) (None, 4096) 102764544 _________________________________________________________________ fc2 (Dense) (None, 4096) 16781312 _________________________________________________________________ dropout_10 (Dropout) (None, 4096) 0 _________________________________________________________________ prediction (Dense) (None, 10) 40970 ================================================================= Total params: 134,301,514 Trainable params: 119,586,826 Non-trainable params: 14,714,688 _________________________________________________________________ (1000, 48, 48, 3) (1000, 48, 48, 3) Train on 1000 samples, validate on 1000 samples Epoch 1/20 1000/1000 [==============================] - 175s 175ms/step - loss: 2.1289 - acc: 0.2350 - val_loss: 1.9100 - val_acc: 0.4230 Epoch 2/20 1000/1000 [==============================] - 190s 190ms/step - loss: 1.7685 - acc: 0.4420 - val_loss: 1.6503 - val_acc: 0.4930 Epoch 3/20 1000/1000 [==============================] - 265s 265ms/step - loss: 1.5582 - acc: 0.5140 - val_loss: 1.5005 - val_acc: 0.5440 Epoch 4/20 1000/1000 [==============================] - 373s 373ms/step - loss: 1.4210 - acc: 0.5710 - val_loss: 1.3019 - val_acc: 0.6160 Epoch 5/20 1000/1000 [==============================] - 295s 295ms/step - loss: 1.1946 - acc: 0.6490 - val_loss: 1.1182 - val_acc: 0.7280 Epoch 6/20 1000/1000 [==============================] - 277s 277ms/step - loss: 1.0291 - acc: 0.7330 - val_loss: 1.0279 - val_acc: 0.7430 Epoch 7/20 1000/1000 [==============================] - 177s 177ms/step - loss: 1.0065 - acc: 0.7060 - val_loss: 0.9229 - val_acc: 0.7690 Epoch 8/20 1000/1000 [==============================] - 169s 169ms/step - loss: 0.8438 - acc: 0.7810 - val_loss: 0.9716 - val_acc: 0.6670 Epoch 9/20 1000/1000 [==============================] - 169s 169ms/step - loss: 0.8898 - acc: 0.7230 - val_loss: 0.9710 - val_acc: 0.6660 Epoch 10/20 1000/1000 [==============================] - 166s 166ms/step - loss: 0.8258 - acc: 0.7460 - val_loss: 0.9026 - val_acc: 0.7130 Epoch 11/20 1000/1000 [==============================] - 169s 169ms/step - loss: 0.7592 - acc: 0.7640 - val_loss: 0.9691 - val_acc: 0.6730 Epoch 12/20 1000/1000 [==============================] - 165s 165ms/step - loss: 0.7793 - acc: 0.7520 - val_loss: 0.8350 - val_acc: 0.6800 Epoch 13/20 1000/1000 [==============================] - 164s 164ms/step - loss: 0.6677 - acc: 0.7780 - val_loss: 0.7203 - val_acc: 0.7730 Epoch 14/20 1000/1000 [==============================] - 164s 164ms/step - loss: 0.7018 - acc: 0.7630 - val_loss: 0.6947 - val_acc: 0.7760 Epoch 15/20 1000/1000 [==============================] - 163s 163ms/step - loss: 0.6129 - acc: 0.8100 - val_loss: 0.7025 - val_acc: 0.7610 Epoch 16/20 1000/1000 [==============================] - 163s 163ms/step - loss: 0.6104 - acc: 0.8190 - val_loss: 0.6385 - val_acc: 0.8220 Epoch 17/20 1000/1000 [==============================] - 163s 163ms/step - loss: 0.5507 - acc: 0.8320 - val_loss: 0.6273 - val_acc: 0.8290 Epoch 18/20 1000/1000 [==============================] - 164s 164ms/step - loss: 0.5205 - acc: 0.8360 - val_loss: 0.8740 - val_acc: 0.6750 Epoch 19/20 1000/1000 [==============================] - 163s 163ms/step - loss: 0.5852 - acc: 0.8150 - val_loss: 0.6614 - val_acc: 0.7890 Epoch 20/20 1000/1000 [==============================] - 166s 166ms/step - loss: 0.5310 - acc: 0.8340 - val_loss: 0.5718 - val_acc: 0.8250
三.解析
VGGNet是牛津大学计算机视觉组(Visual Geometry Group)和Google DeepMind公司的研究员一起研发的深度卷积神经网络。VGG探索了卷积神经网络的深度与其性能之间的关系,通过反复堆叠3*3的小型卷积核和2*2的最大池化层,VGG成功构筑了16-19层深的卷积神经网络。
VGG取得了2014年比赛分类项目第二名和定位项目第一名。同时,VGG拓展性很强,迁移到其他图片数据上的泛化性非常好。VGG的结构简洁,整个网络都是使用了同样大小的卷积核尺寸3*3和池化层2*2。VGG现在也还经常被用来提取图像特征,可用来在图像分类任务上进行再训练,相当于提供了非常好的初始化权重。
VGG通过加深层次来提升性能,拥有5段卷积,每一段内有2-3个卷积层,同时每段尾部都会连接一个最大池化层来缩小图片尺寸。每段内的卷积核数量一样,越靠后段的卷积核数量越多,64-128-256-512-512。