1 Matlab示例代码为两部分,分别对应不同的论文:
2
3 1. Reducing the Dimensionality of data with neural networks
4
5 ministdeepauto.m backprop.m rbmhidlinear.m
6
7 2. A fast learing algorithm for deep belief net
8
9 mnistclassify.m backpropclassfy.m
10
11 其余部分代码通用。
12
13 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
14 mnistclassify.m
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16
17 clear all
18 close all
19
20 maxepoch=50; %迭代次数
21 numhid=500; numpen=500; numpen2=2000;
22
23 fprintf(1,'Converting Raw files into Matlab format
');
24 converter;
25
26 fprintf(1,'Pretraining a deep autoencoder.
');
27 fprintf(1,'The Science paper used 50 epochs. This uses %3i
', maxepoch);
28
29 makebatches;%分批数据
30 [numcases numdims numbatches]=size(batchdata); %获取batchdata数据大小
31 %%numcases 每批数据的个数
32 %%numdims 数据元组的维度
33 %%numbtches 数据批数
34
35 fprintf(1,'Pretraining Layer 1 with RBM: %d-%d
',numdims,numhid);%图像输入层到第一个隐藏层
36 restart=1; %设置初始化参数
37 rbm; %调用RBM训练数据
38 hidrecbiases=hidbiases; %获取隐藏层偏置值
39 save mnistvhclassify vishid hidrecbiases visbiases; %
40
41 fprintf(1,'
Pretraining Layer 2 with RBM: %d-%d
',numhid,numpen);%第一个隐藏层到第二个隐藏层
42 batchdata=batchposhidprobs; %上一个RBM的隐藏层输出,读入作为这个RBM的输入
43 numhid=numpen;%设置隐藏层的节点数,输入的节点数已经由读入数据给出
44 restart=1;
45 rbm;
46 hidpen=vishid; penrecbiases=hidbiases; hidgenbiases=visbiases; %同上,提取权值,偏置,
47 save mnisthpclassify hidpen penrecbiases hidgenbiases;
48
49 fprintf(1,'
Pretraining Layer 3 with RBM: %d-%d
',numpen,numpen2);%第二个隐藏层到第三层隐藏层,其余同上
50 batchdata=batchposhidprobs;
51 numhid=numpen2;
52 restart=1;
53 rbm;
54 hidpen2=vishid; penrecbiases2=hidbiases; hidgenbiases2=visbiases;
55 save mnisthp2classify hidpen2 penrecbiases2 hidgenbiases2;
56
57 backpropclassify;
58
59
60
61
62 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
63 backpropclassify.m
64 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
65 maxepoch=200;
66 fprintf(1,'
Training discriminative model on MNIST by minimizing cross entropy error.
');%最小化交叉熵
67 fprintf(1,'60 batches of 1000 cases each.
');
68
69 load mnistvhclassify%加载各层之间的权值,以及偏置
70 load mnisthpclassify
71 load mnisthp2classify
72
73 makebatches;%分批数据
74 [numcases numdims numbatches]=size(batchdata);
75 N=numcases; %获取每批数据向量数
76
77 %%%% PREINITIALIZE WEIGHTS OF THE DISCRIMINATIVE MODEL%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
78
79 w1=[vishid; hidrecbiases];%第一层到第二层的权重,以及第二层的偏置
80 w2=[hidpen; penrecbiases];%类上
81 w3=[hidpen2; penrecbiases2];%类上
82 w_class = 0.1*randn(size(w3,2)+1,10);%随机生成第四层列数+1行,10列的矩阵
83 %%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
84
85 l1=size(w1,1)-1;%获取每层的单元个数
86 l2=size(w2,1)-1;
87 l3=size(w3,1)-1;
88 l4=size(w_class,1)-1;%最高层的单元个数
89 l5=10; %label层单元个数
90 test_err=[];%
91 train_err=[];%
92
93
94 for epoch = 1:maxepoch
95
96 %%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
97 err=0;
98 err_cr=0;
99 counter=0;
100 [numcases numdims numbatches]=size(batchdata);
101 %%numcases 每批数据的个数
102 %%numdims 数据元组的维度
103 %%numbtches 数据批数
104 N=numcases;%%每批次数据向量个数
105 for batch = 1:numbatches
106 data = [batchdata(:,:,batch)];%读取一批次数据
107 target = [batchtargets(:,:,batch)];%读取当前批次的目标值
108 data = [data ones(N,1)];%在原数据后添加N行1列数据
109 w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];%sigmod计算各层的概率值,参见BP算法
110 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
111 w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
112
113 targetout = exp(w3probs*w_class);%计算最后的输出值N行10列
114 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
115 %对最后的label的输出处理过程,见公式6.1,其中w3probs*w_class是label的输入
116 %最后只能有一个单元被激活,激活单元的选择即通过下面计算得出的概率来进行选择
117 %10个单元组成的“softmax”组
118 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
119 targetout = targetout./repmat(sum(targetout,2),1,10);%计算最后10个label输出除以输出值的总和
120
121 [I J]=max(targetout,[],2);%取计算结果每行中的最大值,以及其列标
122 [I1 J1]=max(target,[],2);%取原先设定目标值的最大值以及列标
123 counter=counter+length(find(J==J1));%统计正确的条数
124 err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ; %%%%????
125 end
126 train_err(epoch)=(numcases*numbatches-counter);%总的错误条数???
127 train_crerr(epoch)=err_cr/numbatches;%平均每批次错误率???
128
129 %%%%%%%%%%%%%% END OF COMPUTING TRAINING MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
130
131 %%%%%%%%%%%%%%%%%%%% COMPUTE TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
132 err=0;
133 err_cr=0;
134 counter=0;
135 [testnumcases testnumdims testnumbatches]=size(testbatchdata);
136
137 N=testnumcases;
138 for batch = 1:testnumbatches
139 data = [testbatchdata(:,:,batch)];
140 target = [testbatchtargets(:,:,batch)];
141 data = [data ones(N,1)];
142 w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];
143 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
144 w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
145 targetout = exp(w3probs*w_class);
146 targetout = targetout./repmat(sum(targetout,2),1,10);
147
148 [I J]=max(targetout,[],2);
149 [I1 J1]=max(target,[],2);
150 counter=counter+length(find(J==J1));
151 err_cr = err_cr- sum(sum( target(:,1:end).*log(targetout))) ;
152 end
153 test_err(epoch)=(testnumcases*testnumbatches-counter);
154 test_crerr(epoch)=err_cr/testnumbatches;
155 fprintf(1,'Before epoch %d Train # misclassified: %d (from %d). Test # misclassified: %d (from %d)
',...
156 epoch,train_err(epoch),numcases*numbatches,test_err(epoch),testnumcases*testnumbatches);
157
158 %%%%%%%%%%%%%% END OF COMPUTING TEST MISCLASSIFICATION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
159
160 tt=0;
161 for batch = 1:numbatches/10
162 fprintf(1,'epoch %d batch %d
',epoch,batch);
163
164 %%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
165 %组合10个小批次为1000样例的批次,然后用conjugate gradient来进行微调
166 tt=tt+1;
167 data=[];
168 targets=[];
169 for kk=1:10
170 data=[data
171 batchdata(:,:,(tt-1)*10+kk)]; %10个小批次合成
172 targets=[targets
173 batchtargets(:,:,(tt-1)*10+kk)];
174 end
175
176 %%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
177 max_iter=3; %设置线性搜索的次数
178
179 if epoch<6 % First update top-level weights holding other weights fixed.
180 N = size(data,1); %获取数据的行数
181 XX = [data ones(N,1)]; %每行数据后面增加1,用来增加偏置
182 w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs ones(N,1)];
183 w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
184 w3probs = 1./(1 + exp(-w2probs*w3)); %w3probs = [w3probs ones(N,1)];
185
186 VV = [w_class(:)']'; %VV将随机生成的向量w_class展开成一列???为什么展开成一列与minimize的参数有关
187 %
188 Dim = [l4; l5]; %记录最后两层的单元节点数,即2000的隐藏层和10的label层
189 [X, fX] = minimize(VV,'CG_CLASSIFY_INIT',max_iter,Dim,w3probs,targets);%只训练两层 %%%详细见函数定义
190 %minimize is Cari Rasmusssen's "minimize" code
191 %%------------------参数含义------------------%%
192 %VV 随机权重向量的展开 ,其作为输入参数,列必须为1(D by 1)
193 %X 函数f="CG_CLASSIFY_INIT"的最优化参数
194 %fX 函数f对X的偏导
195 %max_iter 如果为正,表示线性搜索次数,为负,函数的最大值个数
196 %%-------------------------------------------------%
197 w_class = reshape(X,l4+1,l5);%恢复权值矩阵结构
198
199 else %进入整体微调过程
200 VV = [w1(:)' w2(:)' w3(:)' w_class(:)']'; %将所有权值按列展开成一列
201 Dim = [l1; l2; l3; l4; l5]; %记录各层单元个数传入
202 [X, fX] = minimize(VV,'CG_CLASSIFY',max_iter,Dim,data,targets);
203
204 w1 = reshape(X(1:(l1+1)*l2),l1+1,l2); %恢复W1权值1.0
205 xxx = (l1+1)*l2; %临时变量,用于恢复权值单元
206 w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
207 xxx = xxx+(l2+1)*l3;
208 w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
209 xxx = xxx+(l3+1)*l4;
210 w_class = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
211
212 end
213 %%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
214
215 end
216
217 save mnistclassify_weights w1 w2 w3 w_class
218 save mnistclassify_error test_err test_crerr train_err train_crerr;
219
220 end
221
222
223
224
225 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
226 rbm.m
227 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
228 epsilonw = 0.1; % Learning rate for weights
229 epsilonvb = 0.1; % Learning rate for biases of visible units
230 epsilonhb = 0.1; % Learning rate for biases of hidden units
231 weightcost = 0.0002;
232 initialmomentum = 0.5;
233 finalmomentum = 0.9;
234
235 [numcases numdims numbatches]=size(batchdata);
236 %%numcases 每批数据的个数
237 %%numdims 数据元组的维度
238 %%numbtches 数据批数
239
240 if restart ==1,
241 restart=0;
242 epoch=1;
243
244 % Initializing symmetric weights and biases. 初始化对称权值和偏置
245 vishid = 0.1*randn(numdims, numhid); %初始化生成可视层到隐藏层的权值
246 hidbiases = zeros(1,numhid);%隐藏单元的偏置值
247 visbiases = zeros(1,numdims);%可见单元的偏置值
248
249 poshidprobs = zeros(numcases,numhid); %正向的隐藏单元概率生成
250 neghidprobs = zeros(numcases,numhid);%反向的隐藏单元概率生成
251 posprods = zeros(numdims,numhid);%正向可见单元概率生成
252 negprods = zeros(numdims,numhid);%反向可见单元概率生成
253 vishidinc = zeros(numdims,numhid);%%%%%可视单元和隐藏单元之间的权值增量
254 hidbiasinc = zeros(1,numhid);%%隐藏单元的偏置增量
255 visbiasinc = zeros(1,numdims);%%可视单元的偏置增量
256 batchposhidprobs=zeros(numcases,numhid,numbatches);%存储每次迭代计算好的每层的隐藏层概率,作为下一个RBM的输入
257 end
258
259 %%%%%%%%%%%%%%%%简单输出 迭代次数 处理的批次%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
260 for epoch = epoch:maxepoch, %迭代处理
261 fprintf(1,'epoch %d
',epoch);
262 errsum=0; %初始化输出错误为0
263 for batch = 1:numbatches, %每次处理一批次的数据
264 fprintf(1,'epoch %d batch %d
',epoch,batch);
265
266 %%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
267 data = batchdata(:,:,batch); %读取当前批次的全部数据vi
268 poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1))); %计算前向传播的隐藏层概率hi
269 batchposhidprobs(:,:,batch)=poshidprobs;%将计算好的概率赋值给当前批次前向传播的隐藏层最后一次计算好的值作为下一层的输入
270 posprods = data' * poshidprobs;%contrastive divergence过程<vi,hi>
271
272 poshidact = sum(poshidprobs);%average-wise隐藏层激活概率值
273 posvisact = sum(data);%average-wise可视层激活概率值
274
275 %%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
276 poshidstates = poshidprobs > rand(numcases,numhid);%gibbs抽样,设定状态
277
278 %%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
279 negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));%根据hi计算vi+1
280 neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1))); %根据vi+1计算hi+1
281 negprods = negdata'*neghidprobs;%contrastive divergence <vi+1,hi+1>
282
283 neghidact = sum(neghidprobs);
284 negvisact = sum(negdata);
285
286 %%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
287 err= sum(sum( (data-negdata).^2 )); %重新构建数据的方差
288 errsum = err + errsum;%整体方差
289
290 if epoch>5, %迭代次数不同调整冲量
291 momentum=finalmomentum;
292 else
293 momentum=initialmomentum;
294 end;
295
296 %%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
297 vishidinc = momentum*vishidinc + ...
298 epsilonw*( (posprods-negprods)/numcases - weightcost*vishid);%权重增量计算
299 visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact);%偏置增量计算
300 hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact);%隐藏层增量计算
301
302 vishid = vishid + vishidinc;
303 visbiases = visbiases + visbiasinc;
304 hidbiases = hidbiases + hidbiasinc;
305
306 %%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
307
308 end
309 fprintf(1, 'epoch %4i error %6.1f
', epoch, errsum);
310 end;
311
312
313
314
315 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
316 CG_CLASSIFY_INIT.M
317 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
318 function [f, df] = CG_CLASSIFY_INIT(VV,Dim,w3probs,target);%CG对最上面两层的训练
319 l1 = Dim(1);
320 l2 = Dim(2);
321 N = size(w3probs,1);
322 % Do decomversion.
323 w_class = reshape(VV,l1+1,l2); %恢复权重,
324 w3probs = [w3probs ones(N,1)]; %一列,偏置
325
326 targetout = exp(w3probs*w_class); %计算label层的输出结果为numbercase*lablesnumber的矩阵
327 targetout = targetout./repmat(sum(targetout,2),1,10); %选择最后的激活单元,见backpropclassify.m 的76行
328 f = -sum(sum( target(:,1:end).*log(targetout))) ; %交叉熵 只采用了前边部分
329
330 IO = (targetout-target(:,1:end)); % 输入和输出结果之间的差值
331 Ix_class=IO; %
332 dw_class = w3probs'*Ix_class;%导数F(x)((1-F(x))乘以输出结果的偏差..其中F为sigmoid函数
333
334 df = [dw_class(:)']';
335
336
337
338
339
340 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
341 CG_CLASSIFY.M
342 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
343 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
344 % 该段代码对所有权重进行整体微调
345 % 各部分过程见 CG_CLASSIFY_INIT.m注解
346 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
347 function [f, df] = CG_CLASSIFY(VV,Dim,XX,target);
348
349
350 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
351 rbmhidlinear.m
352 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
353 %除了最后计算单元值采用的是线性单元其余过程全部一样
354 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
355
356 复制代码