使用了两个卷积层、一个全连接层和一个softmax分类器。
在测试数据集上正确率可以达到99.22%。
代码参考了neural-networks-and-deep-learning
1 #coding:utf8 2 import cPickle 3 import numpy as np 4 import theano 5 import theano.tensor as T 6 from theano.tensor.nnet import conv 7 from theano.tensor.nnet import softmax 8 from theano.tensor import shared_randomstreams 9 from theano.tensor.signal import downsample 10 def ReLU(z): return T.maximum(0.0, z) 11 from theano.tensor.nnet import sigmoid 12 13 def load_data_shared(): 14 f = open('mnist.pkl', 'rb') 15 training_data, validation_data, test_data = cPickle.load(f) 16 f.close() 17 def shared(data): 18 shared_x = theano.shared( 19 np.asarray(data[0], dtype=theano.config.floatX), borrow=True) 20 shared_y = theano.shared( 21 np.asarray(data[1], dtype=theano.config.floatX), borrow=True) 22 return shared_x, T.cast(shared_y, "int32") 23 return [shared(training_data), shared(validation_data), shared(test_data)] 24 25 class Network(object): 26 def __init__(self, layers, mini_batch_size): 27 self.layers = layers 28 self.mini_batch_size = mini_batch_size 29 self.params = [param for layer in self.layers for param in layer.params] # w,b 30 self.x = T.matrix("x") 31 self.y = T.ivector("y") # 1 dimensional 32 init_layer = self.layers[0] 33 init_layer.set_inpt(self.x, self.x, self.mini_batch_size) 34 for j in xrange(1, len(self.layers)): 35 prev_layer, layer = self.layers[j-1], self.layers[j] # layer[j-1]->j 36 layer.set_inpt( 37 prev_layer.output, prev_layer.output_dropout, self.mini_batch_size) 38 self.output = self.layers[-1].output 39 self.output_dropout = self.layers[-1].output_dropout 40 41 def SGD(self, training_data, epochs, mini_batch_size, eta, 42 validation_data, test_data, lmbda=0.0): 43 training_x, training_y = training_data 44 validation_x, validation_y = validation_data 45 test_x, test_y = test_data 46 num_training_batches = size(training_data)/mini_batch_size 47 num_validation_batches = size(validation_data)/mini_batch_size 48 num_test_batches = size(test_data)/mini_batch_size 49 l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers]) 50 cost = self.layers[-1].cost(self)+ 51 0.5*lmbda*l2_norm_squared/num_training_batches 52 grads = T.grad(cost, self.params) # 根据cost计算梯度,无需prime函数 53 updates = [(param, param-eta*grad) 54 for param, grad in zip(self.params, grads)] 55 56 i = T.lscalar() # mini-batch index 57 train_mb = theano.function( 58 [i], cost, updates=updates, 59 givens={ 60 self.x: 61 training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], 62 self.y: 63 training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] 64 }) 65 validate_mb_accuracy = theano.function( 66 [i], self.layers[-1].accuracy(self.y), 67 givens={ 68 self.x: 69 validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], 70 self.y: 71 validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] 72 }) 73 test_mb_accuracy = theano.function( 74 [i], self.layers[-1].accuracy(self.y), 75 givens={ 76 self.x: 77 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size], 78 self.y: 79 test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] 80 }) 81 self.test_mb_predictions = theano.function( 82 [i], self.layers[-1].y_out, 83 givens={ 84 self.x: 85 test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size] 86 }) 87 88 best_validation_accuracy = 0.0 89 for epoch in xrange(epochs): 90 for minibatch_index in xrange(num_training_batches): 91 iteration = num_training_batches*epoch+minibatch_index 92 if iteration % 1000 == 0: 93 print("Training mini-batch number {0}".format(iteration)) 94 cost_ij = train_mb(minibatch_index) 95 if (iteration+1) % num_training_batches == 0: 96 validation_accuracy = np.mean( 97 [validate_mb_accuracy(j) for j in xrange(num_validation_batches)]) 98 print("Epoch {0}: validation accuracy {1:.2%},cost={2}".format( 99 epoch, validation_accuracy,cost_ij)) 100 if validation_accuracy >= best_validation_accuracy: 101 print("This is the best validation accuracy to date.") 102 best_validation_accuracy = validation_accuracy 103 best_iteration = iteration 104 if test_data: 105 test_accuracy = np.mean( 106 [test_mb_accuracy(j) for j in xrange(num_test_batches)]) 107 print('The corresponding test accuracy is {0:.2%}'.format( 108 test_accuracy)) 109 print("Finished training network.") 110 print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format( 111 best_validation_accuracy, best_iteration)) 112 print("Corresponding test accuracy of {0:.2%}".format(test_accuracy)) 113 114 115 class ConvPoolLayer(object): # layer init 116 def __init__(self, filter_shape, image_shape, poolsize=(2, 2), 117 activation_fn=ReLU): 118 self.filter_shape = filter_shape # 20, 1, 5, 5, 输入个数1, 卷积核5*5,20个 119 self.image_shape = image_shape # 10, 1, 28, 28, 1与上面一致 120 self.poolsize = poolsize # 2,2 121 self.activation_fn=activation_fn # theano.tensor.nnet.sigmoid 122 n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize)) # 20*5*5/2/2=125 123 self.w = theano.shared( # 20, 1, 5, 5 124 np.asarray( 125 np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape), 126 dtype=theano.config.floatX), 127 borrow=True) 128 self.b = theano.shared( # 20 129 np.asarray( 130 np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)), 131 dtype=theano.config.floatX), 132 borrow=True) 133 self.params = [self.w, self.b] 134 135 def set_inpt(self, inpt, inpt_dropout, mini_batch_size): 136 self.inpt = inpt.reshape(self.image_shape) # 10, 1, 28, 28 137 conv_out = conv.conv2d( # 28-5+1=24 1, 20, 24, 24 138 input=self.inpt, filters=self.w, filter_shape=self.filter_shape, 139 image_shape=self.image_shape) 140 pooled_out = downsample.max_pool_2d( # 24/2=12 1, 20, 12, 12 141 input=conv_out, ds=self.poolsize, ignore_border=True) 142 self.output = self.activation_fn( # 1, 20, 12, 12 + 1, 20, 1, 1= 1, 20, 12, 12 143 pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # 1, 20, 1, 1 144 self.output_dropout = self.output # no dropout in the convolutional layers 145 146 class FullyConnectedLayer(object): 147 def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0): 148 self.n_in = n_in 149 self.n_out = n_out 150 self.activation_fn = activation_fn 151 self.p_dropout = p_dropout 152 self.w = theano.shared( 153 np.asarray( 154 np.random.normal( 155 loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)), 156 dtype=theano.config.floatX), 157 name='w', borrow=True) 158 self.b = theano.shared( 159 np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)), 160 dtype=theano.config.floatX), 161 name='b', borrow=True) 162 self.params = [self.w, self.b] 163 164 def set_inpt(self, inpt, inpt_dropout, mini_batch_size): 165 self.inpt = inpt.reshape((mini_batch_size, self.n_in)) 166 self.output = self.activation_fn( 167 (1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) 168 self.y_out = T.argmax(self.output, axis=1) 169 self.inpt_dropout = dropout_layer( 170 inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) 171 self.output_dropout = self.activation_fn( 172 T.dot(self.inpt_dropout, self.w) + self.b) 173 174 def accuracy(self, y): 175 return T.mean(T.eq(y, self.y_out)) 176 177 class SoftmaxLayer(object): 178 179 def __init__(self, n_in, n_out, p_dropout=0.0): 180 self.n_in = n_in 181 self.n_out = n_out 182 self.p_dropout = p_dropout 183 self.w = theano.shared( 184 np.zeros((n_in, n_out), dtype=theano.config.floatX), 185 name='w', borrow=True) 186 self.b = theano.shared( 187 np.zeros((n_out,), dtype=theano.config.floatX), 188 name='b', borrow=True) 189 self.params = [self.w, self.b] 190 191 def set_inpt(self, inpt, inpt_dropout, mini_batch_size): 192 self.inpt = inpt.reshape((mini_batch_size, self.n_in)) 193 self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) # theano.tensor.nnet.softmax 194 self.y_out = T.argmax(self.output, axis=1) 195 self.inpt_dropout = dropout_layer( 196 inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) 197 self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b) 198 199 def cost(self, net): 200 return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]), net.y]) 201 202 def accuracy(self, y): 203 return T.mean(T.eq(y, self.y_out)) 204 205 206 def size(data): # for shared data 207 return len(data[0].get_value()) 208 209 210 def dropout_layer(layer, p_dropout): # 随机无视p_dropout的隐含层节点 211 srng = shared_randomstreams.RandomStreams( 212 np.random.RandomState(0).randint(999999)) 213 mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape) 214 return layer*T.cast(mask, theano.config.floatX) 215 216 217 if __name__ =='__main__': 218 training_data, validation_data, test_data = load_data_shared() 219 mini_batch_size = 10 220 net = Network([ 221 ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), 222 filter_shape=(20, 1, 5, 5), 223 poolsize=(2, 2)), 224 ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), 225 filter_shape=(40, 20, 5, 5), 226 poolsize=(2, 2)), 227 FullyConnectedLayer(n_in=40*4*4, n_out=100), 228 SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size) 229 net.SGD(training_data, 30, mini_batch_size, 0.1, 230 validation_data, test_data) 231 232 # Sigmoid ConvPoolLayer 233 # Epoch 29: validation accuracy 98.96%,cost=9.70275432337e-05 234 # This is the best validation accuracy to date. 235 # The corresponding test accuracy is 98.86% 236 237 # ReLU ConvPoolLayer 238 # Epoch 29: validation accuracy 99.06%,cost=4.11269593315e-06 239 # This is the best validation accuracy to date. 240 # The corresponding test accuracy is 99.22%