其代码原型来自于yusugomori,DBN其主要就是RBM的堆叠,在代码实现里需要区分好第几个RBM,其每个RBM的隐层是下一个RBM的输入层。以及微调的原理就是根据已有的标签数据和最后一个隐含层生成的输出层进行调整。初学小娃。
DBN.h
1 class DBN { 2 3 public: 4 int N; 5 int n_ins; 6 int *hidden_layer_sizes; 7 int n_outs; 8 int n_layers; 9 HiddenLayer **sigmoid_layers; 10 RBM **rbm_layers; 11 LogisticRegression *log_layer; 12 DBN(int, int, int*, int, int); 13 ~DBN(); 14 void pretrain(int*, double, int, int); 15 void finetune(int*, int*, double, int); 16 void predict(int*, double*); 17 };
HiddenLayer.h
1 class HiddenLayer { 2 3 public: 4 int N; 5 int n_in; 6 int n_out; 7 double **W; 8 double *b; 9 HiddenLayer(int, int, int, double**, double*); 10 ~HiddenLayer(); 11 double output(int*, double*, double); 12 void sample_h_given_v(int*, int*); 13 };
LogisticRegression.h
1 class LogisticRegression { 2 3 public: 4 int N; 5 int n_in; 6 int n_out; 7 double **W; 8 double *b; 9 LogisticRegression(int, int, int); 10 ~LogisticRegression(); 11 void train(int*, int*, double); 12 void softmax(double*); 13 void predict(int*, double*); 14 };
RBM.h
1 class RBM { 2 3 public: 4 int N; 5 int n_visible; 6 int n_hidden; 7 double **W; 8 double *hbias; 9 double *vbias; 10 RBM(int, int, int, double**, double*, double*); 11 ~RBM(); 12 void contrastive_divergence(int*, double, int); 13 void sample_h_given_v(int*, double*, int*); 14 void sample_v_given_h(int*, double*, int*); 15 double propup(int*, double*, double); 16 double propdown(int*, int, double); 17 void gibbs_hvh(int*, double*, int*, double*, int*); 18 void reconstruct(int*, double*); 19 };
DBN.cpp
1 #include <iostream> 2 #include <cmath> 3 #include "HiddenLayer.h" 4 #include <stdlib.h> 5 #include "RBM.h" 6 #include "LogisticRegression.h" 7 #include "DBN.h" 8 using namespace std; 9 10 11 double uniform(double min, double max) 12 { 13 return rand() / (RAND_MAX + 1.0) * (max - min) + min; 14 } 15 16 int binomial(int n, double p) 17 { 18 if(p < 0 || p > 1) return 0; 19 20 int c = 0; 21 double r; 22 23 for(int i=0; i<n; i++) { 24 r = rand() / (RAND_MAX + 1.0); 25 if (r < p) c++; 26 } 27 28 return c; 29 } 30 31 double sigmoid(double x) 32 { 33 return 1.0 / (1.0 + exp(-x)); 34 } 35 36 37 // DBN 38 //各种初始化 39 DBN::DBN(int size, int n_i, int *hls, int n_o, int n_l) 40 { 41 int input_size; 42 43 N = size; 44 n_ins = n_i; 45 hidden_layer_sizes = hls; 46 n_outs = n_o; 47 n_layers = n_l; 48 49 sigmoid_layers = new HiddenLayer*[n_layers]; 50 rbm_layers = new RBM*[n_layers]; 51 52 //初始化为多少层,在本代码是有2个RBM组成,所以是2个 53 for(int i=0; i<n_layers; i++) 54 { 55 if(i == 0) 56 { 57 input_size = n_ins;//第一层就是原始输入层 58 } 59 else 60 { 61 input_size = hidden_layer_sizes[i-1];//第二层为第一个隐含层作为下一层的输入层 62 } 63 64 //构造隐含层 65 sigmoid_layers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], NULL, NULL); 66 67 //构造RBM层 68 rbm_layers[i] = new RBM(N, input_size, hidden_layer_sizes[i],sigmoid_layers[i]->W, sigmoid_layers[i]->b, NULL); 69 } 70 //根据上面的构造可以发现构造出的是2层RBM结构 71 //初始化logistic,很显然起输入层是最后一层隐含层,输出层是整个模型的输出层 72 log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs); 73 } 74 75 DBN::~DBN() 76 { 77 delete log_layer; 78 79 for(int i=0; i<n_layers; i++) 80 { 81 delete sigmoid_layers[i]; 82 delete rbm_layers[i]; 83 } 84 delete[] sigmoid_layers; 85 delete[] rbm_layers; 86 } 87 88 //数据处理,注意是两层RBM,所以需要分开处理,另外要得到最后一层隐藏层。 89 void DBN::pretrain(int *input, double lr, int k, int epochs) 90 { 91 int *layer_input; 92 int prev_layer_input_size; 93 int *prev_layer_input; 94 95 int *train_X = new int[n_ins]; 96 97 for(int i=0; i<n_layers; i++) 98 { 99 for(int epoch=0; epoch<epochs; epoch++) 100 { 101 for(int n=0; n<N; n++) 102 { 103 for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m]; 104 for(int l=0; l<=i; l++) 105 { 106 if(l == 0) 107 { 108 layer_input = new int[n_ins]; 109 for(int j=0; j<n_ins; j++) layer_input[j] = train_X[j]; 110 } 111 else 112 { 113 if(l == 1) prev_layer_input_size = n_ins; 114 else prev_layer_input_size = hidden_layer_sizes[l-2]; 115 prev_layer_input = new int[prev_layer_input_size]; 116 for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j]; 117 delete[] layer_input; 118 119 layer_input = new int[hidden_layer_sizes[l-1]]; 120 sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input); 121 delete[] prev_layer_input; 122 } 123 } 124 rbm_layers[i]->contrastive_divergence(layer_input, lr, k); 125 } 126 } 127 } 128 delete[] train_X; 129 delete[] layer_input; 130 } 131 //微调就是根据标签数据的标签来进行微调,过程和LR类似。 132 void DBN::finetune(int *input, int *label, double lr, int epochs) 133 { 134 int *layer_input; 135 int *prev_layer_input; 136 137 int *train_X = new int[n_ins]; 138 int *train_Y = new int[n_outs]; 139 140 for(int epoch=0; epoch<epochs; epoch++) 141 { 142 for(int n=0; n<N; n++) 143 { 144 for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m]; 145 for(int m=0; m<n_outs; m++) train_Y[m] = label[n * n_outs + m]; 146 for(int i=0; i<n_layers; i++) 147 { 148 if(i == 0) 149 { 150 prev_layer_input = new int[n_ins]; 151 for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[j]; 152 } 153 else 154 { 155 prev_layer_input = new int[hidden_layer_sizes[i-1]]; 156 for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j]; 157 delete[] layer_input; 158 } 159 layer_input = new int[hidden_layer_sizes[i]]; 160 sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input); 161 delete[] prev_layer_input; 162 } 163 log_layer->train(layer_input, train_Y, lr); 164 } 165 } 166 delete[] layer_input; 167 delete[] train_X; 168 delete[] train_Y; 169 } 170 171 void DBN::predict(int *x, double *y) 172 { 173 double *layer_input; 174 double *prev_layer_input; 175 176 double linear_output; 177 prev_layer_input = new double[n_ins]; 178 for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j]; 179 for(int i=0; i<n_layers; i++) 180 { 181 layer_input = new double[sigmoid_layers[i]->n_out]; 182 for(int k=0; k<sigmoid_layers[i]->n_out; k++) 183 { 184 linear_output = 0.0; 185 for(int j=0; j<sigmoid_layers[i]->n_in; j++) 186 { 187 linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j]; 188 } 189 linear_output += sigmoid_layers[i]->b[k]; 190 layer_input[k] = sigmoid(linear_output); 191 } 192 delete[] prev_layer_input; 193 194 if(i < n_layers-1) 195 { 196 prev_layer_input = new double[sigmoid_layers[i]->n_out]; 197 for(int j=0; j<sigmoid_layers[i]->n_out; j++) prev_layer_input[j] = layer_input[j]; 198 delete[] layer_input; 199 } 200 } 201 202 for(int i=0; i<log_layer->n_out; i++) 203 { 204 y[i] = 0; 205 for(int j=0; j<log_layer->n_in; j++) 206 { 207 y[i] += log_layer->W[i][j] * layer_input[j]; 208 } 209 y[i] += log_layer->b[i]; 210 } 211 log_layer->softmax(y); 212 delete[] layer_input; 213 } 214 215 216 // HiddenLayer 217 HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) 218 { 219 N = size; 220 n_in = in; 221 n_out = out; 222 223 if(w == NULL) 224 { 225 W = new double*[n_out]; 226 for(int i=0; i<n_out; i++) W[i] = new double[n_in]; 227 double a = 1.0 / n_in; 228 229 for(int i=0; i<n_out; i++) 230 { 231 for(int j=0; j<n_in; j++) 232 { 233 W[i][j] = uniform(-a, a); 234 } 235 } 236 } 237 else 238 { 239 W = w; 240 } 241 242 if(bp == NULL) 243 { 244 b = new double[n_out]; 245 } 246 else 247 { 248 b = bp; 249 } 250 } 251 252 HiddenLayer::~HiddenLayer() 253 { 254 for(int i=0; i<n_out; i++) delete W[i]; 255 delete[] W; 256 delete[] b; 257 } 258 259 double HiddenLayer::output(int *input, double *w, double b) 260 { 261 double linear_output = 0.0; 262 for(int j=0; j<n_in; j++) 263 { 264 linear_output += w[j] * input[j]; 265 } 266 linear_output += b; 267 return sigmoid(linear_output); 268 } 269 270 void HiddenLayer::sample_h_given_v(int *input, int *sample) 271 { 272 for(int i=0; i<n_out; i++) 273 { 274 sample[i] = binomial(1, output(input, W[i], b[i])); 275 } 276 } 277 278 279 // RBM 280 RBM::RBM(int size, int n_v, int n_h, double **w, double *hb, double *vb) 281 { 282 N = size; 283 n_visible = n_v; 284 n_hidden = n_h; 285 286 if(w == NULL) 287 { 288 W = new double*[n_hidden]; 289 for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible]; 290 double a = 1.0 / n_visible; 291 292 for(int i=0; i<n_hidden; i++) 293 { 294 for(int j=0; j<n_visible; j++) 295 { 296 W[i][j] = uniform(-a, a); 297 } 298 } 299 } 300 else 301 { 302 W = w; 303 } 304 305 if(hb == NULL) 306 { 307 hbias = new double[n_hidden]; 308 for(int i=0; i<n_hidden; i++) hbias[i] = 0; 309 } 310 else 311 { 312 hbias = hb; 313 } 314 315 if(vb == NULL) 316 { 317 vbias = new double[n_visible]; 318 for(int i=0; i<n_visible; i++) vbias[i] = 0; 319 } 320 else 321 { 322 vbias = vb; 323 } 324 } 325 326 RBM::~RBM() 327 { 328 delete[] vbias; 329 } 330 331 332 void RBM::contrastive_divergence(int *input, double lr, int k) 333 { 334 double *ph_mean = new double[n_hidden]; 335 int *ph_sample = new int[n_hidden]; 336 double *nv_means = new double[n_visible]; 337 int *nv_samples = new int[n_visible]; 338 double *nh_means = new double[n_hidden]; 339 int *nh_samples = new int[n_hidden]; 340 341 /* CD-k */ 342 sample_h_given_v(input, ph_mean, ph_sample);//获得h0 343 344 for(int step=0; step<k; step++) 345 { 346 if(step == 0) 347 { 348 gibbs_hvh(ph_sample, nv_means, nv_samples, nh_means, nh_samples);//获得V1,h1 349 } 350 else 351 { 352 gibbs_hvh(nh_samples, nv_means, nv_samples, nh_means, nh_samples); 353 } 354 } 355 356 //更新权值,双向偏移量。由于hinton提出的CD-K,可以知道其v0代表的是原始数据x 357 //h0即ph_sigm_out,h0近似等于对v0下h的概率 358 //v1即代表的是经过一次转换后的x,近似等于对h0下v的概率。 359 //h1同理。CD-K主要就是求出这个三个数据,便能够很好的近似计算梯度。至于为什么我也不知道。 360 for(int i=0; i<n_hidden; i++) 361 { 362 for(int j=0; j<n_visible; j++) 363 { 364 //可以根据权重公式发现,其实P(hi=1|v)代表的就是h0,p(hi=1|Vyk)和Vyk代表的就是h1和V1. 365 W[i][j] += lr * (ph_mean[i] * input[j] - nh_means[i] * nv_samples[j]) / N; 366 } 367 hbias[i] += lr * (ph_sample[i] - nh_means[i]) / N; 368 } 369 370 for(int i=0; i<n_visible; i++) 371 { 372 vbias[i] += lr * (input[i] - nv_samples[i]) / N; 373 } 374 375 delete[] ph_mean; 376 delete[] ph_sample; 377 delete[] nv_means; 378 delete[] nv_samples; 379 delete[] nh_means; 380 delete[] nh_samples; 381 } 382 383 void RBM::sample_h_given_v(int *v0_sample, double *mean, int *sample) 384 { 385 for(int i=0; i<n_hidden; i++) 386 { 387 mean[i] = propup(v0_sample, W[i], hbias[i]); 388 sample[i] = binomial(1, mean[i]); 389 } 390 } 391 392 void RBM::sample_v_given_h(int *h0_sample, double *mean, int *sample) 393 { 394 for(int i=0; i<n_visible; i++) 395 { 396 mean[i] = propdown(h0_sample, i, vbias[i]); 397 sample[i] = binomial(1, mean[i]); 398 } 399 } 400 401 double RBM::propup(int *v, double *w, double b) 402 { 403 double pre_sigmoid_activation = 0.0; 404 for(int j=0; j<n_visible; j++) 405 { 406 pre_sigmoid_activation += w[j] * v[j]; 407 } 408 pre_sigmoid_activation += b; 409 return sigmoid(pre_sigmoid_activation); 410 } 411 412 double RBM::propdown(int *h, int i, double b) 413 { 414 double pre_sigmoid_activation = 0.0; 415 for(int j=0; j<n_hidden; j++) 416 { 417 pre_sigmoid_activation += W[j][i] * h[j]; 418 } 419 pre_sigmoid_activation += b; 420 return sigmoid(pre_sigmoid_activation); 421 } 422 423 void RBM::gibbs_hvh(int *h0_sample, double *nv_means, int *nv_samples,double *nh_means, int *nh_samples) 424 { 425 sample_v_given_h(h0_sample, nv_means, nv_samples); 426 sample_h_given_v(nv_samples, nh_means, nh_samples); 427 } 428 429 void RBM::reconstruct(int *v, double *reconstructed_v) 430 { 431 double *h = new double[n_hidden]; 432 double pre_sigmoid_activation; 433 434 for(int i=0; i<n_hidden; i++) 435 { 436 h[i] = propup(v, W[i], hbias[i]); 437 } 438 439 for(int i=0; i<n_visible; i++) 440 { 441 pre_sigmoid_activation = 0.0; 442 for(int j=0; j<n_hidden; j++) 443 { 444 pre_sigmoid_activation += W[j][i] * h[j]; 445 } 446 pre_sigmoid_activation += vbias[i]; 447 reconstructed_v[i] = sigmoid(pre_sigmoid_activation); 448 } 449 delete[] h; 450 } 451 452 453 // LogisticRegression 454 LogisticRegression::LogisticRegression(int size, int in, int out) 455 { 456 N = size; 457 n_in = in; 458 n_out = out; 459 460 W = new double*[n_out]; 461 for(int i=0; i<n_out; i++) W[i] = new double[n_in]; 462 b = new double[n_out]; 463 464 for(int i=0; i<n_out; i++) 465 { 466 for(int j=0; j<n_in; j++) 467 { 468 W[i][j] = 0; 469 } 470 b[i] = 0; 471 } 472 } 473 474 LogisticRegression::~LogisticRegression() 475 { 476 for(int i=0; i<n_out; i++) delete[] W[i]; 477 delete[] W; 478 delete[] b; 479 } 480 481 482 void LogisticRegression::train(int *x, int *y, double lr) 483 { 484 double *p_y_given_x = new double[n_out]; 485 double *dy = new double[n_out]; 486 487 for(int i=0; i<n_out; i++) 488 { 489 p_y_given_x[i] = 0; 490 for(int j=0; j<n_in; j++) 491 { 492 p_y_given_x[i] += W[i][j] * x[j]; 493 } 494 p_y_given_x[i] += b[i]; 495 } 496 softmax(p_y_given_x); 497 498 for(int i=0; i<n_out; i++) 499 { 500 dy[i] = y[i] - p_y_given_x[i]; 501 for(int j=0; j<n_in; j++) 502 { 503 W[i][j] += lr * dy[i] * x[j] / N; 504 } 505 b[i] += lr * dy[i] / N; 506 } 507 508 delete[] p_y_given_x; 509 delete[] dy; 510 } 511 512 void LogisticRegression::softmax(double *x) 513 { 514 double max = 0.0; 515 double sum = 0.0; 516 517 for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i]; 518 for(int i=0; i<n_out; i++) 519 { 520 x[i] = exp(x[i] - max); 521 sum += x[i]; 522 } 523 524 for(int i=0; i<n_out; i++) x[i] /= sum; 525 } 526 527 void LogisticRegression::predict(int *x, double *y) 528 { 529 for(int i=0; i<n_out; i++) 530 { 531 y[i] = 0; 532 for(int j=0; j<n_in; j++) 533 { 534 y[i] += W[i][j] * x[j]; 535 } 536 y[i] += b[i]; 537 } 538 softmax(y); 539 } 540 541 void test_dbn() 542 { 543 srand(0); 544 545 double pretrain_lr = 0.1; 546 int pretraining_epochs = 1000; 547 int k = 1; 548 double finetune_lr = 0.1; 549 int finetune_epochs = 500; 550 551 int train_N = 6; 552 int test_N = 3; 553 int n_ins = 6; 554 int n_outs = 2; 555 int hidden_layer_sizes[] = {3, 3}; 556 int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]); 557 558 // training data 559 int train_X[6][6] = { 560 {1, 1, 1, 0, 0, 0}, 561 {1, 0, 1, 0, 0, 0}, 562 {1, 1, 1, 0, 0, 0}, 563 {0, 0, 1, 1, 1, 0}, 564 {0, 0, 1, 1, 0, 0}, 565 {0, 0, 1, 1, 1, 0} 566 }; 567 568 int train_Y[6][2] = { 569 {1, 0}, 570 {1, 0}, 571 {1, 0}, 572 {0, 1}, 573 {0, 1}, 574 {0, 1} 575 }; 576 //构造RBN包括构造多层,隐藏层,RBM及LR 577 DBN dbn(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers); 578 //预处理过程 579 dbn.pretrain(*train_X, pretrain_lr, k, pretraining_epochs); 580 //微调 581 dbn.finetune(*train_X, *train_Y, finetune_lr, finetune_epochs); 582 // test data 583 int test_X[3][6] = { 584 {1, 1, 0, 0, 0, 0}, 585 {0, 0, 0, 1, 1, 0}, 586 {1, 1, 1, 1, 1, 0} 587 }; 588 double test_Y[3][2]; 589 // test 590 for(int i=0; i<test_N; i++) 591 { 592 dbn.predict(test_X[i], test_Y[i]); 593 for(int j=0; j<n_outs; j++) 594 { 595 cout << test_Y[i][j] << " "; 596 } 597 cout << endl; 598 } 599 } 600 601 int main() 602 { 603 test_dbn(); 604 return 0; 605 }