• CF Code


    user-knn

      1 import numpy
      2 import csv
      3 from numpy import *
      4 
      5 '''
      6 data from (1,1)->(user, item)
      7 (user, 0) mean the mean rating of user u
      8 (0, item) mean the mean rating of item i 
      9 '''
     10 
     11 def toInt(arr):
     12     print('toInt() startting...')
     13     arr = mat(arr)
     14     m, n = shape(arr)
     15     nArr = zeros((m, n))
     16     for i in range(m):
     17         for j in range(n):
     18             nArr[i, j] = int(arr[i, j])
     19     print('toInt() ending...')
     20     return nArr
     21 
     22 def loadTrainData(path):
     23     print('loadTrainData startting...')
     24     l = []
     25     with open(path, 'r') as file:
     26         lines = csv.reader(file)
     27         for line in lines:
     28             l.append(line)
     29     l = array(l)
     30     print('loadTrainData ending...')
     31     return toInt(l)
     32 
     33 def loadTestData(path):
     34     print('loadTestData startting...')
     35     l = []
     36     with open(path) as file:
     37         lines = csv.reader(file)
     38         for line in lines:
     39             l.append(line)
     40     l = array(l)
     41     print('loadTestData ending...')
     42     return toInt(l)
     43 
     44 def fillUIMatrix(uimatrix, train_data):
     45     print('fillUIMatrix startting...')
     46     train_data = mat(train_data)
     47     m, n = shape(train_data)
     48     for i in range(m):
     49         uimatrix[train_data[i, 0], train_data[i, 1]] = train_data[i, 2]
     50     print('fillUIMatrix ending...')
     51 
     52 def calAverageRating(uimatrix):
     53     print('calAverageRating starting...')
     54     uimatrix = mat(uimatrix)
     55     m, n = shape(uimatrix)
     56     for i in range(1, m):
     57         rating = 0
     58         cnt = 0
     59         for j in range(1, n):
     60             rating += uimatrix[i, j]
     61             if uimatrix[i, j] != 0:
     62                 cnt += 1
     63         uimatrix[i, 0] = rating / cnt
     64     
     65     for i in range(1, n):
     66         rating = 0
     67         cnt = 0
     68         for j in range(1, m):
     69             rating += uimatrix[j, i]
     70             if uimatrix[j, i] != 0:
     71                 cnt += 1
     72         if cnt == 0: uimatrix[0, i] = 0
     73         else: uimatrix[0, i] = rating / cnt
     74     print('calAverageRating ending...')
     75 
     76 def calPerson(l1, l2, rating1, rating2):
     77     print('calPerson startting...')
     78     r1 = 0.0; r2 = 0.0; r3 = 0.0;
     79     for i in range(len(l1)):
     80         r1 += (l1[i]-rating1)*(l2[i]-rating2)
     81         r2 += (l1[i]-rating1)*(l1[i]-rating1)
     82         r3 += (l2[i]-rating2)*(l2[i]-rating2)
     83     r = r1 / (sqrt(r2)*sqrt(r3))
     84     print('calPerson ending...')
     85     return abs(r)
     86     
     87 def rSort(r_list, index_list):
     88     print('rSort startting...')
     89     for i in range(len(r_list)-1):
     90         for j in range(len(r_list)-1-i):
     91             if r_list[j] < r_list[j+1]:
     92                 tmp = r_list[j]
     93                 r_list[j] = r_list[j+1]
     94                 r_list[j+1] = tmp
     95                 tmp = index_list[j]
     96                 index_list[j] = index_list[j+1]
     97                 index_list[j+1] = tmp
     98     for i in range(len(r_list)):
     99         print(i, ':', r_list[i])
    100     print('rSort ending...')
    101     
    102 def calSim(uimatrix, index):
    103     print('calSim startting...')
    104     uimatrix = mat(uimatrix)
    105     m, n = shape(uimatrix)
    106     r_list = [];         # sim list
    107     index_list = [];     # mapping sim and index
    108     for i in range(1, m):
    109         l1 = []; l2 = [];
    110         if i == index: continue
    111         for j in range(1, n):
    112             if uimatrix[i, j] != 0 and uimatrix[index, j] != 0:
    113                 l1.append(uimatrix[index, j]);
    114                 l2.append(uimatrix[i, j])
    115         if l1 != []:
    116             rating1 = 0; rating2 = 0;
    117             for j in range(len(l1)):
    118                 rating1 += l1[j]
    119             for j in range(len(l2)):
    120                 rating2 += l2[j]
    121             rating1 /= len(l1); rating2 /= len(l2);
    122             r = calPerson(l1, l2, rating1, rating2)
    123             if math.isnan(r) == True: r = 0.0
    124             r_list.append(r)
    125             index_list.append(i)
    126     rSort(r_list, index_list)
    127     print('calSim ending...')
    128     return r_list, index_list
    129 
    130 def calRMSE(uimatrix, test_data, users):
    131     print('calRMSE startting...')
    132     test_data = mat(test_data)
    133     m, n = shape(test_data)
    134     tmp1 = 0
    135     tmp2 = 0
    136     for k in range(1, users+1):
    137         for i in range(m):
    138             if test_data[i, 0] == k:
    139                 if uimatrix[k, test_data[i, 1]] == 0.0: 
    140                     uimatrix[k, test_data[i, 1]] = uimatrix[k, 0]
    141                 uimatrix[k, test_data[i, 1]] = round(uimatrix[k, test_data[i, 1]])
    142                 tmp1 += (test_data[i, 2]-uimatrix[k, test_data[i, 1]])**2
    143                 tmp2 += 1
    144                 print(test_data[i, 1], ' real rating:', test_data[i, 2], ' predict:', uimatrix[k, test_data[i, 1]])
    145     print('calRMSE ending...')
    146     return sqrt(tmp1/tmp2)
    147 
    148 select_top = 30
    149 users = 943
    150 items = 1682
    151 user_item_matrix = zeros((users+1, items+1))
    152 train_path = 'C:\Users\think\Desktop\data\u2.base'
    153 test_path = 'C:\Users\think\Desktop\data\u2.test'
    154 
    155 train_data = loadTrainData(train_path)
    156 test_data = loadTestData(test_path)
    157 
    158 fillUIMatrix(user_item_matrix, train_data)
    159 calAverageRating(user_item_matrix)
    160 uimatrix = user_item_matrix
    161 uimatrix = mat(uimatrix)
    162 
    163 for i in range(1,users):
    164     r_list, index_list = calSim(uimatrix, i)
    165     for j in range(1, items):
    166         if uimatrix[i, j] == 0:
    167             tmp1 = 0.0; tmp2 = 0.0;
    168             for k in range(select_top):
    169                 if math.isnan(r_list[k]) == False and uimatrix[index_list[k], j] != 0:
    170                     tmp1 += r_list[k]*(uimatrix[index_list[k], j]-uimatrix[index_list[k], 0])
    171                     tmp2 += r_list[k]
    172             print(j, tmp1, tmp2)
    173             if tmp2 == 0: uimatrix[i, j] = uimatrix[i, 0]
    174             else: uimatrix[i, j] = uimatrix[i, 0] + tmp1/tmp2
    175     
    176 RMSE = calRMSE(uimatrix, test_data, users)
    177 print(RMSE)
    View Code

    lfm1

      1 from numpy import *
      2 import csv
      3 import time
      4 
      5 def RMSE(estimation, truth):
      6     num = len(estimation)
      7     
      8     sse = sum(square(truth - estimation))
      9     return sqrt(divide(sse, num-1.0))
     10 
     11 class matrixFactorization():
     12     def __init__(self, num_user, num_item, num_feature, train_data, test_data, **params):
     13         self._num_user = num_user
     14         self._num_item = num_item
     15         self._num_featrue = num_feature
     16         self._train_data = train_data
     17         self._test_data = test_data
     18         
     19         self.batch_size = int(params.get('batch_size', 1000000))
     20         
     21         self.epsilon = float(params.get('epsilon', 100.0))
     22         self.lam = float(params.get('lam', 0.00001))
     23         
     24         self.max_rating = params.get('max_rating')
     25         self.min_rating = params.get('min_rating')
     26         
     27         if self.max_rating:
     28             self.max_rating = float(self.max_rating)
     29         if self.min_rating:
     30             self.min_rating = float(self.min_rating)
     31         
     32         self._mean_rating = mean(self._train_data[:, 2])
     33         
     34         self._user_feature = 0.3 * random.rand(num_user, num_feature)
     35         self._item_feature = 0.3 * random.rand(num_item, num_feature)
     36         
     37         self.train_errors = []
     38         self.test_errors = []
     39         
     40     def estimate(self, iterations = 50, converge = 1e-4):
     41         last_rmse = None
     42         for iteration in range(iterations):
     43             data = self._train_data
     44             #compute gradient
     45             u_features = (self._user_feature)[data[:, 0], :]
     46             i_features = (self._item_feature)[data[:, 1], :]
     47             ratings = data[:, 2] - self._mean_rating
     48             preds = sum(u_features*i_features, 1)
     49             errs = preds - ratings
     50             err_mat = tile(errs, (self._num_featrue, 1)).T
     51             
     52             u_grads = u_features * err_mat + self.lam * i_features
     53             i_grads = i_features * err_mat + self.lam * u_features
     54             
     55             u_feature_grads = zeros((self._num_user, self._num_featrue))
     56             i_feature_grads = zeros((self._num_item, self._num_featrue))
     57             
     58             for i in range(shape(data)[0]):
     59                 user = data[i, 0]
     60                 item = data[i, 1]
     61                 u_feature_grads[user, :] += u_grads[i, :]
     62                 i_feature_grads[item, :] += i_grads[i, :]
     63             
     64             self._user_feature = self._user_feature - (self.epsilon / self.batch_size) * u_feature_grads
     65             self._item_feature = self._item_feature - (self.epsilon / self.batch_size) * i_feature_grads
     66         
     67             train_preds = self.predict(self._train_data)
     68             train_rmse = RMSE(train_preds, float16(self._train_data[:, 2]))
     69             
     70             test_preds = self.predict(self._test_data)
     71             test_rmse = RMSE(test_preds, float16(self._test_data[:, 2]))
     72             
     73             self.train_errors.append(train_rmse)
     74             self.test_errors.append(test_rmse)
     75             
     76             print('iterations: %3d, train RMSE: %.6f, test RMSE: %.6f') % (iteration+1, train_rmse, test_rmse)
     77             
     78             if last_rmse:
     79                 if abs(train_rmse - last_rmse) < converge:
     80                     break
     81             last_rmse = train_rmse
     82     
     83     def predict(self, data):
     84         u_features = self._user_feature[data[:, 0], :]
     85         i_features = self._item_feature[data[:, 1], :]
     86         preds = sum(u_features*i_features, 1) + self._mean_rating
     87         
     88         if self.max_rating:
     89             preds[preds > self.max_rating] = self.max_rating
     90         if self.min_rating:
     91             preds[preds < self.min_rating] = self.min_rating
     92         return preds
     93 
     94 def toInt(arr):
     95     print('toInt() startting...')
     96     arr = mat(arr)
     97     m, n = shape(arr)
     98     nArr = zeros((m, n), dtype='int8')
     99     for i in range(m):
    100         for j in range(n):
    101             nArr[i, j] = int(arr[i, j])
    102     print('toInt() ending...')
    103     return nArr
    104 
    105 def loadTrainData(path):
    106     print('loadTrainData startting...')
    107     l = []
    108     with open(path, 'r') as file:
    109         lines = csv.reader(file)
    110         for line in lines:
    111             l.append(line)
    112     l = array(l)
    113     print('loadTrainData ending...')
    114     return toInt(l)
    115 
    116 def loadTestData(path):
    117     print('loadTestData startting...')
    118     l = []
    119     with open(path) as file:
    120         lines = csv.reader(file)
    121         for line in lines:
    122             l.append(line)
    123     l = array(l)
    124     print('loadTestData ending...')
    125     return toInt(l)
    126 
    127 train_path = 'C:\Users\think\Desktop\data\u1.base'
    128 test_path = 'C:\Users\think\Desktop\data\u1.test'
    129 
    130 train_data = loadTrainData(train_path)
    131 test_data = loadTestData(test_path)
    132 num_feature = 15
    133 max_iter = 20000
    134 num_user = 943
    135 num_item = 1682
    136 rec = matrixFactorization(num_user, num_item, num_feature, train_data, test_data, max_rating=5, min_rating=1)
    137 rec.estimate(max_iter)
    View Code

    lfm2

      1 from __future__ import division
      2 import numpy as np
      3 import scipy as sp
      4 from numpy import *
      5 from numpy.random import random
      6 import csv
      7 
      8 class  SVD_C:
      9     def __init__(self,X,k=20):
     10         '''
     11             k  is the length of vector
     12         '''
     13         self.X=np.array(X)
     14         self.k=k
     15         self.ave=np.mean(self.X[:,2])
     16         print "the input data size is ",self.X.shape
     17         self.bi={}
     18         self.bu={}
     19         self.qi={}
     20         self.pu={}
     21         self.movie_user={}
     22         self.user_movie={}
     23         for i in range(self.X.shape[0]):
     24             uid=self.X[i][0]
     25             mid=self.X[i][1]
     26             rat=self.X[i][2]
     27             self.movie_user.setdefault(mid,{})
     28             self.user_movie.setdefault(uid,{})
     29             self.movie_user[mid][uid]=rat
     30             self.user_movie[uid][mid]=rat
     31             self.bi.setdefault(mid,0)
     32             self.bu.setdefault(uid,0)
     33             self.qi.setdefault(mid,random((self.k,1))/10*(np.sqrt(self.k)))
     34             self.pu.setdefault(uid,random((self.k,1))/10*(np.sqrt(self.k)))
     35     def pred(self,uid,mid):
     36         self.bi.setdefault(mid,0)
     37         self.bu.setdefault(uid,0)
     38         self.qi.setdefault(mid,np.zeros((self.k,1)))
     39         self.pu.setdefault(uid,np.zeros((self.k,1)))
     40         if (self.qi[mid]==None):
     41             self.qi[mid]=np.zeros((self.k,1))
     42         if (self.pu[uid]==None):
     43             self.pu[uid]=np.zeros((self.k,1))
     44         ans=self.ave+self.bi[mid]+self.bu[uid]+np.sum(self.qi[mid]*self.pu[uid])
     45         if ans>5:
     46             return 5
     47         elif ans<1:
     48             return 1
     49         return ans
     50     def train(self,steps=50,gamma=0.04,Lambda=0.15):
     51         for step in range(steps):
     52             print 'the ',step,'-th  step is running'
     53             rmse_sum=0.0
     54             kk=np.random.permutation(self.X.shape[0])
     55             for j in range(self.X.shape[0]):
     56                 i=kk[j]
     57                 uid=self.X[i][0]
     58                 mid=self.X[i][1]
     59                 rat=self.X[i][2]
     60                 eui=rat-self.pred(uid,mid)
     61                 rmse_sum+=eui**2
     62                 self.bu[uid]+=gamma*(eui-Lambda*self.bu[uid])
     63                 self.bi[mid]+=gamma*(eui-Lambda*self.bi[mid])
     64                 temp=self.qi[mid]
     65                 self.qi[mid]+=gamma*(eui*self.pu[uid]-Lambda*self.qi[mid])
     66                 self.pu[uid]+=gamma*(eui*temp-Lambda*self.pu[uid])
     67             gamma=gamma*0.93
     68             print "the rmse of this step on train data is ",np.sqrt(rmse_sum/self.X.shape[0])
     69             #self.test(test_data)
     70     def test(self,test_X):
     71         output=[]
     72         sums=0
     73         test_X=np.array(test_X)
     74         #print "the test data size is ",test_X.shape
     75         for i in range(test_X.shape[0]):
     76             pre=self.pred(test_X[i][0],test_X[i][1])
     77             output.append(pre)
     78             #print pre,test_X[i][2]
     79             sums+=(pre-test_X[i][2])**2
     80         rmse=np.sqrt(sums/test_X.shape[0])
     81         print "the rmse on test data is ",rmse
     82         return output
     83 
     84     
     85 def toInt(arr):
     86     print('toInt() startting...')
     87     arr = mat(arr)
     88     m, n = shape(arr)
     89     nArr = zeros((m, n), dtype='int8')
     90     for i in range(m):
     91         for j in range(n):
     92             nArr[i, j] = int(arr[i, j])
     93     print('toInt() ending...')
     94     return nArr
     95 
     96 def loadTrainData(path):
     97     print('loadTrainData startting...')
     98     l = []
     99     with open(path, 'r') as file:
    100         lines = csv.reader(file)
    101         for line in lines:
    102             l.append(line)
    103     l = array(l)
    104     print('loadTrainData ending...')
    105     return toInt(l)
    106 
    107 def loadTestData(path):
    108     print('loadTestData startting...')
    109     l = []
    110     with open(path) as file:
    111         lines = csv.reader(file)
    112         for line in lines:
    113             l.append(line)
    114     l = array(l)
    115     print('loadTestData ending...')
    116     return toInt(l)
    117 
    118 train_path = 'C:\Users\think\Desktop\data\u1.base'
    119 test_path = 'C:\Users\think\Desktop\data\u1.test'
    120 
    121 train_data = loadTrainData(train_path)
    122 test_data = loadTestData(test_path)
    123 
    124 a = SVD_C(train_data, 30)
    125 a.train()
    126 a.test(test_data)
    View Code

     rbm //rmse有点高啊,1.1多,不知道哪儿的问题Orz

      1 #include <iostream>
      2 #include <string>
      3 #include <cstdlib>
      4 #include <cmath>
      5 #include <algorithm>
      6 #include <vector>
      7 #include <utility>
      8 #include <cstdio>
      9 #include <cstring>
     10 
     11 using namespace std;
     12 
     13 const int num_of_user = 943;
     14 const int num_of_movies = 1682;
     15 const int num_of_rating = 5;
     16 const int num_of_hidden = 200;
     17 const int num_of_visible = 1682;
     18 
     19 double uniform(double, double);
     20 int binomial(double);
     21 
     22 class RBM
     23 {
     24 public:
     25     int N;
     26     int n_visible;
     27     int n_hidden;
     28     int rating;
     29     double W[num_of_hidden][num_of_visible][num_of_rating];
     30     double hbias[num_of_hidden];
     31     double vbias[num_of_rating][num_of_visible];
     32 
     33     RBM(int, int, int, int);
     34     void contrastiveDivergence(int[][1682], double, int);
     35     void sample_h_given_v(int[][1682], double*, int*);
     36     double sigmoid(double);
     37     double Vtoh_sigm(int [][1682], double [][5], double);
     38     void gibbs_hvh(int*, double[][1682], int[][1682], double*, int*);
     39     double HtoV_sigm(int*, int, int, int);
     40     void sample_v_given_h(int* , double [][1682], int [][1682]);
     41     void reconstruct(int[][1682], double[][1682]);
     42 };
     43 
     44 void RBM::contrastiveDivergence(int train_data[][1682], double learning_rate, int k)
     45 {
     46     //train_data 5 * 1682
     47     double ph_sigm_out[num_of_hidden]; // 10
     48     int ph_sample[num_of_hidden]; // 10
     49     double nv_sigm_outs[num_of_rating][num_of_visible]; // 5 * 1682
     50     int nv_samples[num_of_rating][num_of_visible]; // 5 * 1692
     51     double nh_sigm_outs[num_of_hidden]; // 10
     52     int nh_samples[num_of_hidden]; // 10
     53 
     54     sample_h_given_v(train_data, ph_sigm_out, ph_sample);
     55 
     56     for (int i = 0; i < k; ++i)
     57     {
     58         if (i == 0)
     59             gibbs_hvh(ph_sample, nv_sigm_outs, nv_samples, nh_sigm_outs, nh_samples);
     60         else
     61             gibbs_hvh(nh_samples, nv_sigm_outs, nv_samples, nh_sigm_outs, nh_samples);
     62     }
     63 
     64     for (int i = 0; i < n_hidden; ++i)
     65     {
     66         for (int j = 0; j < n_visible; ++j)
     67         {
     68             for (int kk = 0; kk < rating; ++kk)
     69             {
     70                 W[i][j][kk] += learning_rate * (ph_sigm_out[i] * train_data[kk][j] - nh_sigm_outs[i] * nv_samples[kk][j]);
     71             }
     72         }
     73         hbias[i] += learning_rate * (ph_sigm_out[i] - nh_sigm_outs[i]) ;
     74     }
     75 
     76     for (int i = 0; i < rating; ++i)
     77     {
     78         for (int j = 0; j < n_visible; ++j)
     79         {
     80             vbias[i][j] += learning_rate * (train_data[i][j] - nv_samples[i][j]) ;
     81         }
     82     }
     83 }
     84 
     85 void RBM::gibbs_hvh(int* ph_sample, double nv_sigm_outs[][1682], int nv_samples[][1682], double* nh_sigm_outs, int* nh_samples)
     86 {
     87     sample_v_given_h(ph_sample, nv_sigm_outs, nv_samples);
     88     sample_h_given_v(nv_samples, nh_sigm_outs, nh_samples);
     89 }
     90 
     91 void RBM::sample_h_given_v(int train_data[][1682], double* ph_sigm_out, int* ph_sample)
     92 {
     93     for (int i = 0; i < n_hidden; ++i)
     94     {
     95         ph_sigm_out[i] = Vtoh_sigm(train_data, W[i], hbias[i]);
     96         ph_sample[i] = binomial(ph_sigm_out[i]);
     97     }
     98 }
     99 
    100 void RBM::sample_v_given_h(int* h0_sample, double nv_sigm_outs[][1682], int nv_samples[][1682])
    101 {
    102     for (int i = 0; i < rating; ++i)
    103     {
    104         for (int j = 0; j < n_visible; ++j)
    105         {
    106             nv_sigm_outs[i][j] = HtoV_sigm(h0_sample, j, vbias[i][j], i);
    107             nv_samples[i][j] = binomial(nv_sigm_outs[i][j]);
    108         }
    109     }
    110 }
    111 
    112 double RBM::HtoV_sigm(int* h0_sample, int i, int vbias, int kk)
    113 {
    114     double temp = 0;
    115     for (int j = 0; j < n_hidden; ++j)
    116     {
    117         temp += W[j][i][kk] * h0_sample[j];
    118     }
    119     temp += vbias;
    120     return sigmoid(temp);
    121 }
    122 
    123 double RBM::Vtoh_sigm(int train_data[][1682], double W[][5], double hbias)
    124 {
    125     double temp = 0.0;
    126     for (int i = 0; i < rating; ++i)
    127     {
    128         for (int j = 0; j < n_visible; ++j)
    129             temp += W[j][i] * train_data[i][j];
    130     }
    131     temp += hbias;
    132     return sigmoid(temp);
    133 }
    134 
    135 double RBM::sigmoid(double x)
    136 {
    137     return 1.0 / (1.0 + exp(-x));
    138 }
    139 
    140 RBM::RBM(int train_N, int n_v, int n_h, int rt)
    141 {
    142     N = train_N;
    143     n_visible = num_of_visible;
    144     n_hidden = num_of_hidden;
    145     rating = num_of_rating;
    146 
    147     double a = 1.0 / n_visible;
    148     for (int i = 0; i < n_hidden; ++i)
    149         for (int j = 0; j < n_visible; ++j)
    150             for (int k = 0; k < rating; ++k)
    151                 W[i][j][k] = uniform(-a, a);
    152 
    153 
    154     for (int i = 0; i < n_hidden; ++i)
    155         hbias[i] = 0.0;
    156 
    157     for (int i = 0; i < rating; ++i)
    158         for (int j = 0; j < n_visible; ++j)
    159             vbias[i][j] = 0.0;
    160 }
    161 
    162 void RBM::reconstruct(int test_data[][1682], double reconstruct_data[][1682])
    163 {
    164     double h[num_of_hidden];
    165     double temp = 0;
    166 
    167     for (int i = 0; i < n_hidden; ++i)
    168     {
    169         h[i] = Vtoh_sigm(test_data, W[i], hbias[i]);
    170     }
    171 
    172     for (int i = 0; i < rating; ++i)
    173     {
    174         for (int j = 0; j < n_visible; ++j)
    175         {
    176             temp = 0;
    177             for (int kk = 0; kk < n_hidden; ++kk)
    178             {
    179                 temp += W[kk][j][i] * h[kk];
    180             }
    181             temp += vbias[i][j];
    182             reconstruct_data[i][j] = sigmoid(temp);
    183         }
    184     }
    185 }
    186 
    187 double uniform(double min, double max)
    188 {
    189     return rand() / (RAND_MAX + 1.0) * (max - min) + min;
    190 }
    191 
    192 int binomial(double p)
    193 {
    194     if (p < 0 || p > 1) return 0;
    195     double r = rand() / (RAND_MAX + 1.0);
    196     if (r < p) return 1;
    197     else return 0;
    198 }
    199 
    200 double make_predict(RBM rbm, int train_data[][1682], int u, vector<pair<int, int> >& v)
    201 {
    202     double hidden[num_of_hidden];
    203     for (int i = 0; i < num_of_hidden; ++i)
    204     {
    205         double temp = 0.0;
    206         for (int j = 0; j < num_of_rating; ++j)
    207         {
    208             for (int kk = 0; kk < num_of_movies; ++kk)
    209             {
    210                 temp += train_data[j][kk] * rbm.W[i][kk][j];
    211             }
    212         }
    213         temp += rbm.hbias[i];
    214         hidden[i] = rbm.sigmoid(temp);
    215     }
    216     int size = v.size();
    217     double ret = 0;
    218     for (int i = 0; i < size; ++i)
    219     {
    220         double vp[num_of_rating];
    221         int item = v[i].first;
    222         int real_rating = v[i].second;
    223 
    224         for (int j = 0; j < num_of_rating; ++j)
    225         {
    226             double temp = 0;
    227             for (int kk = 0; kk < num_of_hidden; ++kk)
    228             {
    229                 temp += hidden[kk]*rbm.W[kk][item][j];
    230             }
    231             temp += rbm.vbias[j][item];
    232             temp = exp(temp);
    233             vp[j] = temp;
    234         }
    235         double mx = 0, mxi = 0;
    236         for (int j = 0; j < num_of_rating; ++j)
    237         {
    238             if (vp[j] > mx) mx = vp[j], mxi = j;
    239         }
    240         ret += (mxi - real_rating) * (mxi - real_rating);
    241     }
    242     return ret;
    243 }
    244 
    245 void get_train_data(int train_data[][5][1682])
    246 {
    247     FILE *fp;
    248     freopen("E:\DL\MovieLens\ml-100k\u1.base", "r", stdin);
    249     int u, m, r;
    250     long long t;
    251     printf("a
    ");
    252     long long int cnt = 0;
    253     while (~scanf("%d %d %d %lld", &u, &m, &r, &t))
    254     {
    255         u--, m--, r--;
    256         train_data[u][r][m] = 1;
    257     }
    258     fclose(stdin);
    259 }
    260 
    261 void get_test_data(vector<pair<int, int> > td[])
    262 {
    263     FILE* fp;
    264     freopen("E:\DL\MovieLens\ml-100k\u1.test", "r", stdin);
    265     int u, m, r;
    266     long long t;
    267     while (~scanf("%d %d %d %lld", &u, &m, &r, &t))
    268     {
    269         u--, m--, r--;
    270         td[u].push_back(make_pair(m, r));
    271     }
    272     fclose(stdin);
    273 }
    274 
    275 void train()
    276 {
    277     srand(0);
    278     int train_N = 100;
    279     int n_visible = num_of_visible;
    280     int n_hidden = num_of_hidden;
    281     int rating = num_of_rating;
    282     int train_iter = 1000;
    283     double learning_rate = 0.0001;
    284     int training_num = 1000;
    285     int k = 1;
    286     int train_data[943][5][1682];
    287     memset(train_data, 0, sizeof(train_data));
    288     get_train_data(train_data);
    289 
    290     double hbias[num_of_user][num_of_hidden];
    291     memset(hbias, 0, sizeof(hbias));
    292 
    293     vector<pair<int, int> > test_data[num_of_user];
    294     get_test_data(test_data);
    295 
    296 
    297     RBM rbm = RBM(train_N, n_visible, n_hidden, rating);
    298 
    299     for (int iter = 0; iter < train_iter; ++iter)
    300     {
    301         for (int i = 0; i < num_of_user; ++i)
    302         {
    303             rbm.contrastiveDivergence(train_data[i], learning_rate, 1);
    304         }
    305         int cnt = 0;
    306         double error = 0;
    307         for (int i = 0; i < num_of_user; ++i)
    308         {
    309             error += make_predict(rbm, train_data[i], i, test_data[i]);
    310             cnt += test_data[i].size();
    311         }
    312         double rmse = sqrt(error / cnt);
    313         printf("epoch: %d, rmse: %f
    ",iter, rmse);
    314         learning_rate *= 0.9;
    315     }
    316 
    317     for (int i = 0; i < num_of_hidden; ++i)
    318         printf("%lf ", rbm.hbias[i]);
    319     printf("-----------------------------");
    320 
    321     int cnt = 0;
    322     double error = 0;
    323     for (int i = 0; i < num_of_user; ++i)
    324     {
    325         error += make_predict(rbm, train_data[i], i, test_data[i]);
    326         cnt += test_data[i].size();
    327     }
    328     double rmse = sqrt(error / cnt);
    329     printf("rmse: %f
    ", rmse);
    330 
    331 }
    332 
    333 // 943 users
    334 // 1682 items
    335 // 100000 ratings
    336 
    337 int main()
    338 {
    339     train();
    340 
    341     return 0;
    342 }
    View Code
  • 相关阅读:
    Webkit CSS properties
    轻量级前端MVVM框架avalon
    ExtJS4 源码解析(一)带项目分析
    web app开发利器
    运用webkit绘制渲染页面原理解决iscroll4闪动的问题
    吐槽:基于PhoneGap开发移动项目
    轻量级前端MVVM框架avalon
    轻量级前端MVVM框架avalon
    WinDbg 命令三部曲:(一)WinDbg 命令手册
    Unit Testing with NSubstitute
  • 原文地址:https://www.cnblogs.com/JustForCS/p/5486974.html
Copyright © 2020-2023  润新知