user-knn
1 import numpy 2 import csv 3 from numpy import * 4 5 ''' 6 data from (1,1)->(user, item) 7 (user, 0) mean the mean rating of user u 8 (0, item) mean the mean rating of item i 9 ''' 10 11 def toInt(arr): 12 print('toInt() startting...') 13 arr = mat(arr) 14 m, n = shape(arr) 15 nArr = zeros((m, n)) 16 for i in range(m): 17 for j in range(n): 18 nArr[i, j] = int(arr[i, j]) 19 print('toInt() ending...') 20 return nArr 21 22 def loadTrainData(path): 23 print('loadTrainData startting...') 24 l = [] 25 with open(path, 'r') as file: 26 lines = csv.reader(file) 27 for line in lines: 28 l.append(line) 29 l = array(l) 30 print('loadTrainData ending...') 31 return toInt(l) 32 33 def loadTestData(path): 34 print('loadTestData startting...') 35 l = [] 36 with open(path) as file: 37 lines = csv.reader(file) 38 for line in lines: 39 l.append(line) 40 l = array(l) 41 print('loadTestData ending...') 42 return toInt(l) 43 44 def fillUIMatrix(uimatrix, train_data): 45 print('fillUIMatrix startting...') 46 train_data = mat(train_data) 47 m, n = shape(train_data) 48 for i in range(m): 49 uimatrix[train_data[i, 0], train_data[i, 1]] = train_data[i, 2] 50 print('fillUIMatrix ending...') 51 52 def calAverageRating(uimatrix): 53 print('calAverageRating starting...') 54 uimatrix = mat(uimatrix) 55 m, n = shape(uimatrix) 56 for i in range(1, m): 57 rating = 0 58 cnt = 0 59 for j in range(1, n): 60 rating += uimatrix[i, j] 61 if uimatrix[i, j] != 0: 62 cnt += 1 63 uimatrix[i, 0] = rating / cnt 64 65 for i in range(1, n): 66 rating = 0 67 cnt = 0 68 for j in range(1, m): 69 rating += uimatrix[j, i] 70 if uimatrix[j, i] != 0: 71 cnt += 1 72 if cnt == 0: uimatrix[0, i] = 0 73 else: uimatrix[0, i] = rating / cnt 74 print('calAverageRating ending...') 75 76 def calPerson(l1, l2, rating1, rating2): 77 print('calPerson startting...') 78 r1 = 0.0; r2 = 0.0; r3 = 0.0; 79 for i in range(len(l1)): 80 r1 += (l1[i]-rating1)*(l2[i]-rating2) 81 r2 += (l1[i]-rating1)*(l1[i]-rating1) 82 r3 += (l2[i]-rating2)*(l2[i]-rating2) 83 r = r1 / (sqrt(r2)*sqrt(r3)) 84 print('calPerson ending...') 85 return abs(r) 86 87 def rSort(r_list, index_list): 88 print('rSort startting...') 89 for i in range(len(r_list)-1): 90 for j in range(len(r_list)-1-i): 91 if r_list[j] < r_list[j+1]: 92 tmp = r_list[j] 93 r_list[j] = r_list[j+1] 94 r_list[j+1] = tmp 95 tmp = index_list[j] 96 index_list[j] = index_list[j+1] 97 index_list[j+1] = tmp 98 for i in range(len(r_list)): 99 print(i, ':', r_list[i]) 100 print('rSort ending...') 101 102 def calSim(uimatrix, index): 103 print('calSim startting...') 104 uimatrix = mat(uimatrix) 105 m, n = shape(uimatrix) 106 r_list = []; # sim list 107 index_list = []; # mapping sim and index 108 for i in range(1, m): 109 l1 = []; l2 = []; 110 if i == index: continue 111 for j in range(1, n): 112 if uimatrix[i, j] != 0 and uimatrix[index, j] != 0: 113 l1.append(uimatrix[index, j]); 114 l2.append(uimatrix[i, j]) 115 if l1 != []: 116 rating1 = 0; rating2 = 0; 117 for j in range(len(l1)): 118 rating1 += l1[j] 119 for j in range(len(l2)): 120 rating2 += l2[j] 121 rating1 /= len(l1); rating2 /= len(l2); 122 r = calPerson(l1, l2, rating1, rating2) 123 if math.isnan(r) == True: r = 0.0 124 r_list.append(r) 125 index_list.append(i) 126 rSort(r_list, index_list) 127 print('calSim ending...') 128 return r_list, index_list 129 130 def calRMSE(uimatrix, test_data, users): 131 print('calRMSE startting...') 132 test_data = mat(test_data) 133 m, n = shape(test_data) 134 tmp1 = 0 135 tmp2 = 0 136 for k in range(1, users+1): 137 for i in range(m): 138 if test_data[i, 0] == k: 139 if uimatrix[k, test_data[i, 1]] == 0.0: 140 uimatrix[k, test_data[i, 1]] = uimatrix[k, 0] 141 uimatrix[k, test_data[i, 1]] = round(uimatrix[k, test_data[i, 1]]) 142 tmp1 += (test_data[i, 2]-uimatrix[k, test_data[i, 1]])**2 143 tmp2 += 1 144 print(test_data[i, 1], ' real rating:', test_data[i, 2], ' predict:', uimatrix[k, test_data[i, 1]]) 145 print('calRMSE ending...') 146 return sqrt(tmp1/tmp2) 147 148 select_top = 30 149 users = 943 150 items = 1682 151 user_item_matrix = zeros((users+1, items+1)) 152 train_path = 'C:\Users\think\Desktop\data\u2.base' 153 test_path = 'C:\Users\think\Desktop\data\u2.test' 154 155 train_data = loadTrainData(train_path) 156 test_data = loadTestData(test_path) 157 158 fillUIMatrix(user_item_matrix, train_data) 159 calAverageRating(user_item_matrix) 160 uimatrix = user_item_matrix 161 uimatrix = mat(uimatrix) 162 163 for i in range(1,users): 164 r_list, index_list = calSim(uimatrix, i) 165 for j in range(1, items): 166 if uimatrix[i, j] == 0: 167 tmp1 = 0.0; tmp2 = 0.0; 168 for k in range(select_top): 169 if math.isnan(r_list[k]) == False and uimatrix[index_list[k], j] != 0: 170 tmp1 += r_list[k]*(uimatrix[index_list[k], j]-uimatrix[index_list[k], 0]) 171 tmp2 += r_list[k] 172 print(j, tmp1, tmp2) 173 if tmp2 == 0: uimatrix[i, j] = uimatrix[i, 0] 174 else: uimatrix[i, j] = uimatrix[i, 0] + tmp1/tmp2 175 176 RMSE = calRMSE(uimatrix, test_data, users) 177 print(RMSE)
lfm1
1 from numpy import * 2 import csv 3 import time 4 5 def RMSE(estimation, truth): 6 num = len(estimation) 7 8 sse = sum(square(truth - estimation)) 9 return sqrt(divide(sse, num-1.0)) 10 11 class matrixFactorization(): 12 def __init__(self, num_user, num_item, num_feature, train_data, test_data, **params): 13 self._num_user = num_user 14 self._num_item = num_item 15 self._num_featrue = num_feature 16 self._train_data = train_data 17 self._test_data = test_data 18 19 self.batch_size = int(params.get('batch_size', 1000000)) 20 21 self.epsilon = float(params.get('epsilon', 100.0)) 22 self.lam = float(params.get('lam', 0.00001)) 23 24 self.max_rating = params.get('max_rating') 25 self.min_rating = params.get('min_rating') 26 27 if self.max_rating: 28 self.max_rating = float(self.max_rating) 29 if self.min_rating: 30 self.min_rating = float(self.min_rating) 31 32 self._mean_rating = mean(self._train_data[:, 2]) 33 34 self._user_feature = 0.3 * random.rand(num_user, num_feature) 35 self._item_feature = 0.3 * random.rand(num_item, num_feature) 36 37 self.train_errors = [] 38 self.test_errors = [] 39 40 def estimate(self, iterations = 50, converge = 1e-4): 41 last_rmse = None 42 for iteration in range(iterations): 43 data = self._train_data 44 #compute gradient 45 u_features = (self._user_feature)[data[:, 0], :] 46 i_features = (self._item_feature)[data[:, 1], :] 47 ratings = data[:, 2] - self._mean_rating 48 preds = sum(u_features*i_features, 1) 49 errs = preds - ratings 50 err_mat = tile(errs, (self._num_featrue, 1)).T 51 52 u_grads = u_features * err_mat + self.lam * i_features 53 i_grads = i_features * err_mat + self.lam * u_features 54 55 u_feature_grads = zeros((self._num_user, self._num_featrue)) 56 i_feature_grads = zeros((self._num_item, self._num_featrue)) 57 58 for i in range(shape(data)[0]): 59 user = data[i, 0] 60 item = data[i, 1] 61 u_feature_grads[user, :] += u_grads[i, :] 62 i_feature_grads[item, :] += i_grads[i, :] 63 64 self._user_feature = self._user_feature - (self.epsilon / self.batch_size) * u_feature_grads 65 self._item_feature = self._item_feature - (self.epsilon / self.batch_size) * i_feature_grads 66 67 train_preds = self.predict(self._train_data) 68 train_rmse = RMSE(train_preds, float16(self._train_data[:, 2])) 69 70 test_preds = self.predict(self._test_data) 71 test_rmse = RMSE(test_preds, float16(self._test_data[:, 2])) 72 73 self.train_errors.append(train_rmse) 74 self.test_errors.append(test_rmse) 75 76 print('iterations: %3d, train RMSE: %.6f, test RMSE: %.6f') % (iteration+1, train_rmse, test_rmse) 77 78 if last_rmse: 79 if abs(train_rmse - last_rmse) < converge: 80 break 81 last_rmse = train_rmse 82 83 def predict(self, data): 84 u_features = self._user_feature[data[:, 0], :] 85 i_features = self._item_feature[data[:, 1], :] 86 preds = sum(u_features*i_features, 1) + self._mean_rating 87 88 if self.max_rating: 89 preds[preds > self.max_rating] = self.max_rating 90 if self.min_rating: 91 preds[preds < self.min_rating] = self.min_rating 92 return preds 93 94 def toInt(arr): 95 print('toInt() startting...') 96 arr = mat(arr) 97 m, n = shape(arr) 98 nArr = zeros((m, n), dtype='int8') 99 for i in range(m): 100 for j in range(n): 101 nArr[i, j] = int(arr[i, j]) 102 print('toInt() ending...') 103 return nArr 104 105 def loadTrainData(path): 106 print('loadTrainData startting...') 107 l = [] 108 with open(path, 'r') as file: 109 lines = csv.reader(file) 110 for line in lines: 111 l.append(line) 112 l = array(l) 113 print('loadTrainData ending...') 114 return toInt(l) 115 116 def loadTestData(path): 117 print('loadTestData startting...') 118 l = [] 119 with open(path) as file: 120 lines = csv.reader(file) 121 for line in lines: 122 l.append(line) 123 l = array(l) 124 print('loadTestData ending...') 125 return toInt(l) 126 127 train_path = 'C:\Users\think\Desktop\data\u1.base' 128 test_path = 'C:\Users\think\Desktop\data\u1.test' 129 130 train_data = loadTrainData(train_path) 131 test_data = loadTestData(test_path) 132 num_feature = 15 133 max_iter = 20000 134 num_user = 943 135 num_item = 1682 136 rec = matrixFactorization(num_user, num_item, num_feature, train_data, test_data, max_rating=5, min_rating=1) 137 rec.estimate(max_iter)
lfm2
1 from __future__ import division 2 import numpy as np 3 import scipy as sp 4 from numpy import * 5 from numpy.random import random 6 import csv 7 8 class SVD_C: 9 def __init__(self,X,k=20): 10 ''' 11 k is the length of vector 12 ''' 13 self.X=np.array(X) 14 self.k=k 15 self.ave=np.mean(self.X[:,2]) 16 print "the input data size is ",self.X.shape 17 self.bi={} 18 self.bu={} 19 self.qi={} 20 self.pu={} 21 self.movie_user={} 22 self.user_movie={} 23 for i in range(self.X.shape[0]): 24 uid=self.X[i][0] 25 mid=self.X[i][1] 26 rat=self.X[i][2] 27 self.movie_user.setdefault(mid,{}) 28 self.user_movie.setdefault(uid,{}) 29 self.movie_user[mid][uid]=rat 30 self.user_movie[uid][mid]=rat 31 self.bi.setdefault(mid,0) 32 self.bu.setdefault(uid,0) 33 self.qi.setdefault(mid,random((self.k,1))/10*(np.sqrt(self.k))) 34 self.pu.setdefault(uid,random((self.k,1))/10*(np.sqrt(self.k))) 35 def pred(self,uid,mid): 36 self.bi.setdefault(mid,0) 37 self.bu.setdefault(uid,0) 38 self.qi.setdefault(mid,np.zeros((self.k,1))) 39 self.pu.setdefault(uid,np.zeros((self.k,1))) 40 if (self.qi[mid]==None): 41 self.qi[mid]=np.zeros((self.k,1)) 42 if (self.pu[uid]==None): 43 self.pu[uid]=np.zeros((self.k,1)) 44 ans=self.ave+self.bi[mid]+self.bu[uid]+np.sum(self.qi[mid]*self.pu[uid]) 45 if ans>5: 46 return 5 47 elif ans<1: 48 return 1 49 return ans 50 def train(self,steps=50,gamma=0.04,Lambda=0.15): 51 for step in range(steps): 52 print 'the ',step,'-th step is running' 53 rmse_sum=0.0 54 kk=np.random.permutation(self.X.shape[0]) 55 for j in range(self.X.shape[0]): 56 i=kk[j] 57 uid=self.X[i][0] 58 mid=self.X[i][1] 59 rat=self.X[i][2] 60 eui=rat-self.pred(uid,mid) 61 rmse_sum+=eui**2 62 self.bu[uid]+=gamma*(eui-Lambda*self.bu[uid]) 63 self.bi[mid]+=gamma*(eui-Lambda*self.bi[mid]) 64 temp=self.qi[mid] 65 self.qi[mid]+=gamma*(eui*self.pu[uid]-Lambda*self.qi[mid]) 66 self.pu[uid]+=gamma*(eui*temp-Lambda*self.pu[uid]) 67 gamma=gamma*0.93 68 print "the rmse of this step on train data is ",np.sqrt(rmse_sum/self.X.shape[0]) 69 #self.test(test_data) 70 def test(self,test_X): 71 output=[] 72 sums=0 73 test_X=np.array(test_X) 74 #print "the test data size is ",test_X.shape 75 for i in range(test_X.shape[0]): 76 pre=self.pred(test_X[i][0],test_X[i][1]) 77 output.append(pre) 78 #print pre,test_X[i][2] 79 sums+=(pre-test_X[i][2])**2 80 rmse=np.sqrt(sums/test_X.shape[0]) 81 print "the rmse on test data is ",rmse 82 return output 83 84 85 def toInt(arr): 86 print('toInt() startting...') 87 arr = mat(arr) 88 m, n = shape(arr) 89 nArr = zeros((m, n), dtype='int8') 90 for i in range(m): 91 for j in range(n): 92 nArr[i, j] = int(arr[i, j]) 93 print('toInt() ending...') 94 return nArr 95 96 def loadTrainData(path): 97 print('loadTrainData startting...') 98 l = [] 99 with open(path, 'r') as file: 100 lines = csv.reader(file) 101 for line in lines: 102 l.append(line) 103 l = array(l) 104 print('loadTrainData ending...') 105 return toInt(l) 106 107 def loadTestData(path): 108 print('loadTestData startting...') 109 l = [] 110 with open(path) as file: 111 lines = csv.reader(file) 112 for line in lines: 113 l.append(line) 114 l = array(l) 115 print('loadTestData ending...') 116 return toInt(l) 117 118 train_path = 'C:\Users\think\Desktop\data\u1.base' 119 test_path = 'C:\Users\think\Desktop\data\u1.test' 120 121 train_data = loadTrainData(train_path) 122 test_data = loadTestData(test_path) 123 124 a = SVD_C(train_data, 30) 125 a.train() 126 a.test(test_data)
rbm //rmse有点高啊,1.1多,不知道哪儿的问题Orz
1 #include <iostream> 2 #include <string> 3 #include <cstdlib> 4 #include <cmath> 5 #include <algorithm> 6 #include <vector> 7 #include <utility> 8 #include <cstdio> 9 #include <cstring> 10 11 using namespace std; 12 13 const int num_of_user = 943; 14 const int num_of_movies = 1682; 15 const int num_of_rating = 5; 16 const int num_of_hidden = 200; 17 const int num_of_visible = 1682; 18 19 double uniform(double, double); 20 int binomial(double); 21 22 class RBM 23 { 24 public: 25 int N; 26 int n_visible; 27 int n_hidden; 28 int rating; 29 double W[num_of_hidden][num_of_visible][num_of_rating]; 30 double hbias[num_of_hidden]; 31 double vbias[num_of_rating][num_of_visible]; 32 33 RBM(int, int, int, int); 34 void contrastiveDivergence(int[][1682], double, int); 35 void sample_h_given_v(int[][1682], double*, int*); 36 double sigmoid(double); 37 double Vtoh_sigm(int [][1682], double [][5], double); 38 void gibbs_hvh(int*, double[][1682], int[][1682], double*, int*); 39 double HtoV_sigm(int*, int, int, int); 40 void sample_v_given_h(int* , double [][1682], int [][1682]); 41 void reconstruct(int[][1682], double[][1682]); 42 }; 43 44 void RBM::contrastiveDivergence(int train_data[][1682], double learning_rate, int k) 45 { 46 //train_data 5 * 1682 47 double ph_sigm_out[num_of_hidden]; // 10 48 int ph_sample[num_of_hidden]; // 10 49 double nv_sigm_outs[num_of_rating][num_of_visible]; // 5 * 1682 50 int nv_samples[num_of_rating][num_of_visible]; // 5 * 1692 51 double nh_sigm_outs[num_of_hidden]; // 10 52 int nh_samples[num_of_hidden]; // 10 53 54 sample_h_given_v(train_data, ph_sigm_out, ph_sample); 55 56 for (int i = 0; i < k; ++i) 57 { 58 if (i == 0) 59 gibbs_hvh(ph_sample, nv_sigm_outs, nv_samples, nh_sigm_outs, nh_samples); 60 else 61 gibbs_hvh(nh_samples, nv_sigm_outs, nv_samples, nh_sigm_outs, nh_samples); 62 } 63 64 for (int i = 0; i < n_hidden; ++i) 65 { 66 for (int j = 0; j < n_visible; ++j) 67 { 68 for (int kk = 0; kk < rating; ++kk) 69 { 70 W[i][j][kk] += learning_rate * (ph_sigm_out[i] * train_data[kk][j] - nh_sigm_outs[i] * nv_samples[kk][j]); 71 } 72 } 73 hbias[i] += learning_rate * (ph_sigm_out[i] - nh_sigm_outs[i]) ; 74 } 75 76 for (int i = 0; i < rating; ++i) 77 { 78 for (int j = 0; j < n_visible; ++j) 79 { 80 vbias[i][j] += learning_rate * (train_data[i][j] - nv_samples[i][j]) ; 81 } 82 } 83 } 84 85 void RBM::gibbs_hvh(int* ph_sample, double nv_sigm_outs[][1682], int nv_samples[][1682], double* nh_sigm_outs, int* nh_samples) 86 { 87 sample_v_given_h(ph_sample, nv_sigm_outs, nv_samples); 88 sample_h_given_v(nv_samples, nh_sigm_outs, nh_samples); 89 } 90 91 void RBM::sample_h_given_v(int train_data[][1682], double* ph_sigm_out, int* ph_sample) 92 { 93 for (int i = 0; i < n_hidden; ++i) 94 { 95 ph_sigm_out[i] = Vtoh_sigm(train_data, W[i], hbias[i]); 96 ph_sample[i] = binomial(ph_sigm_out[i]); 97 } 98 } 99 100 void RBM::sample_v_given_h(int* h0_sample, double nv_sigm_outs[][1682], int nv_samples[][1682]) 101 { 102 for (int i = 0; i < rating; ++i) 103 { 104 for (int j = 0; j < n_visible; ++j) 105 { 106 nv_sigm_outs[i][j] = HtoV_sigm(h0_sample, j, vbias[i][j], i); 107 nv_samples[i][j] = binomial(nv_sigm_outs[i][j]); 108 } 109 } 110 } 111 112 double RBM::HtoV_sigm(int* h0_sample, int i, int vbias, int kk) 113 { 114 double temp = 0; 115 for (int j = 0; j < n_hidden; ++j) 116 { 117 temp += W[j][i][kk] * h0_sample[j]; 118 } 119 temp += vbias; 120 return sigmoid(temp); 121 } 122 123 double RBM::Vtoh_sigm(int train_data[][1682], double W[][5], double hbias) 124 { 125 double temp = 0.0; 126 for (int i = 0; i < rating; ++i) 127 { 128 for (int j = 0; j < n_visible; ++j) 129 temp += W[j][i] * train_data[i][j]; 130 } 131 temp += hbias; 132 return sigmoid(temp); 133 } 134 135 double RBM::sigmoid(double x) 136 { 137 return 1.0 / (1.0 + exp(-x)); 138 } 139 140 RBM::RBM(int train_N, int n_v, int n_h, int rt) 141 { 142 N = train_N; 143 n_visible = num_of_visible; 144 n_hidden = num_of_hidden; 145 rating = num_of_rating; 146 147 double a = 1.0 / n_visible; 148 for (int i = 0; i < n_hidden; ++i) 149 for (int j = 0; j < n_visible; ++j) 150 for (int k = 0; k < rating; ++k) 151 W[i][j][k] = uniform(-a, a); 152 153 154 for (int i = 0; i < n_hidden; ++i) 155 hbias[i] = 0.0; 156 157 for (int i = 0; i < rating; ++i) 158 for (int j = 0; j < n_visible; ++j) 159 vbias[i][j] = 0.0; 160 } 161 162 void RBM::reconstruct(int test_data[][1682], double reconstruct_data[][1682]) 163 { 164 double h[num_of_hidden]; 165 double temp = 0; 166 167 for (int i = 0; i < n_hidden; ++i) 168 { 169 h[i] = Vtoh_sigm(test_data, W[i], hbias[i]); 170 } 171 172 for (int i = 0; i < rating; ++i) 173 { 174 for (int j = 0; j < n_visible; ++j) 175 { 176 temp = 0; 177 for (int kk = 0; kk < n_hidden; ++kk) 178 { 179 temp += W[kk][j][i] * h[kk]; 180 } 181 temp += vbias[i][j]; 182 reconstruct_data[i][j] = sigmoid(temp); 183 } 184 } 185 } 186 187 double uniform(double min, double max) 188 { 189 return rand() / (RAND_MAX + 1.0) * (max - min) + min; 190 } 191 192 int binomial(double p) 193 { 194 if (p < 0 || p > 1) return 0; 195 double r = rand() / (RAND_MAX + 1.0); 196 if (r < p) return 1; 197 else return 0; 198 } 199 200 double make_predict(RBM rbm, int train_data[][1682], int u, vector<pair<int, int> >& v) 201 { 202 double hidden[num_of_hidden]; 203 for (int i = 0; i < num_of_hidden; ++i) 204 { 205 double temp = 0.0; 206 for (int j = 0; j < num_of_rating; ++j) 207 { 208 for (int kk = 0; kk < num_of_movies; ++kk) 209 { 210 temp += train_data[j][kk] * rbm.W[i][kk][j]; 211 } 212 } 213 temp += rbm.hbias[i]; 214 hidden[i] = rbm.sigmoid(temp); 215 } 216 int size = v.size(); 217 double ret = 0; 218 for (int i = 0; i < size; ++i) 219 { 220 double vp[num_of_rating]; 221 int item = v[i].first; 222 int real_rating = v[i].second; 223 224 for (int j = 0; j < num_of_rating; ++j) 225 { 226 double temp = 0; 227 for (int kk = 0; kk < num_of_hidden; ++kk) 228 { 229 temp += hidden[kk]*rbm.W[kk][item][j]; 230 } 231 temp += rbm.vbias[j][item]; 232 temp = exp(temp); 233 vp[j] = temp; 234 } 235 double mx = 0, mxi = 0; 236 for (int j = 0; j < num_of_rating; ++j) 237 { 238 if (vp[j] > mx) mx = vp[j], mxi = j; 239 } 240 ret += (mxi - real_rating) * (mxi - real_rating); 241 } 242 return ret; 243 } 244 245 void get_train_data(int train_data[][5][1682]) 246 { 247 FILE *fp; 248 freopen("E:\DL\MovieLens\ml-100k\u1.base", "r", stdin); 249 int u, m, r; 250 long long t; 251 printf("a "); 252 long long int cnt = 0; 253 while (~scanf("%d %d %d %lld", &u, &m, &r, &t)) 254 { 255 u--, m--, r--; 256 train_data[u][r][m] = 1; 257 } 258 fclose(stdin); 259 } 260 261 void get_test_data(vector<pair<int, int> > td[]) 262 { 263 FILE* fp; 264 freopen("E:\DL\MovieLens\ml-100k\u1.test", "r", stdin); 265 int u, m, r; 266 long long t; 267 while (~scanf("%d %d %d %lld", &u, &m, &r, &t)) 268 { 269 u--, m--, r--; 270 td[u].push_back(make_pair(m, r)); 271 } 272 fclose(stdin); 273 } 274 275 void train() 276 { 277 srand(0); 278 int train_N = 100; 279 int n_visible = num_of_visible; 280 int n_hidden = num_of_hidden; 281 int rating = num_of_rating; 282 int train_iter = 1000; 283 double learning_rate = 0.0001; 284 int training_num = 1000; 285 int k = 1; 286 int train_data[943][5][1682]; 287 memset(train_data, 0, sizeof(train_data)); 288 get_train_data(train_data); 289 290 double hbias[num_of_user][num_of_hidden]; 291 memset(hbias, 0, sizeof(hbias)); 292 293 vector<pair<int, int> > test_data[num_of_user]; 294 get_test_data(test_data); 295 296 297 RBM rbm = RBM(train_N, n_visible, n_hidden, rating); 298 299 for (int iter = 0; iter < train_iter; ++iter) 300 { 301 for (int i = 0; i < num_of_user; ++i) 302 { 303 rbm.contrastiveDivergence(train_data[i], learning_rate, 1); 304 } 305 int cnt = 0; 306 double error = 0; 307 for (int i = 0; i < num_of_user; ++i) 308 { 309 error += make_predict(rbm, train_data[i], i, test_data[i]); 310 cnt += test_data[i].size(); 311 } 312 double rmse = sqrt(error / cnt); 313 printf("epoch: %d, rmse: %f ",iter, rmse); 314 learning_rate *= 0.9; 315 } 316 317 for (int i = 0; i < num_of_hidden; ++i) 318 printf("%lf ", rbm.hbias[i]); 319 printf("-----------------------------"); 320 321 int cnt = 0; 322 double error = 0; 323 for (int i = 0; i < num_of_user; ++i) 324 { 325 error += make_predict(rbm, train_data[i], i, test_data[i]); 326 cnt += test_data[i].size(); 327 } 328 double rmse = sqrt(error / cnt); 329 printf("rmse: %f ", rmse); 330 331 } 332 333 // 943 users 334 // 1682 items 335 // 100000 ratings 336 337 int main() 338 { 339 train(); 340 341 return 0; 342 }