Experimenrs on Foursquare

 1 import pandas as pd
 2 import numpy as np
 3 import codecs
 4 import math
 5 
 6 doc1 = codecs.open('dataset_TIST2015/dataset_TIST2015_Checkins.txt','rU','latin-1')
 7 checkins = pd.read_csv(doc1, delimiter='	')
 8 checkins.columns=['userId', 'venueId', 'timeUTC', 'timeOffset']
 9 
10 len(checkins['userId'].unique())
11 len(checkins['venueId'].unique())
12 
13 from sklearn.preprocessing import LabelEncoder
14 from scipy.sparse import csr_matrix
15 
16 venueIdencoder = LabelEncoder().fit(checkins['venueId'])
17 userIdencoder = LabelEncoder().fit(checkins['userId'])
18 
19 checkins['venueIdencoded'] = venueIdencoder.transform(checkins['venueId'])
20 n_venues = len(venueIdencoder.classes_)
21 
22 from sklearn.cross_validation import train_test_split
23 
24 train_df, test_df = train_test_split(checkins, train_size = 0.8)
25 
26 train = csr_matrix((np.ones(train_df.shape[0]), (train_df.userId, train_df.venueIdencoded)), shape=((train_df.userId.max()+1),n_venues))
27 
28 test = csr_matrix((np.ones(test_df.shape[0]), (test_df.userId, test_df.venueIdencoded)), shape=((test_df.userId.max()+1),n_venues))
29 
30 #print(test.nnz)
31 #print(train.nnz)
32 
33 #print(test.max())
34 #print(train.max())
35 
36 from lightfm import LightFM
37 from lightfm.evaluation import auc_score
38 
39 NUM_THREADS = 1
40 NUM_COMPONENTS = 30
41 NUM_EPOCHS = 1
42 ITEM_ALPHA = math.exp(-6)
43 
44 model = LightFM(loss='warp',
45                 item_alpha=ITEM_ALPHA,
46                 no_components=NUM_COMPONENTS)
47 
48 model.fit(train,epochs=NUM_EPOCHS,num_threads=NUM_THREADS)
49 
50 
51 train_auc = auc_score(model, train,num_threads=NUM_THREADS).mean()
52 test_auc = auc_score(model, test,train_interactions=train,num_threads=NUM_THREADS).mean()
53 
54 print("Train_auc is %f" %train_auc)
55 print("Test_aus is %f" %test_auc)

Some problems :

Expect to get a binary marix but no...

Here is the code in console:

 1 train
 2 Out[6]: 
 3 <266910x3680125 sparse matrix of type '<class 'numpy.float64'>'
 4     with 12774460 stored elements in Compressed Sparse Row format>
 5 train.data.max()
 6 Out[7]: 520.0
 7 train.data.min()
 8 Out[8]: 1.0
 9 test.data.max()
10 Out[9]: 140.0
11 test.data.mean()
12 Out[10]: 1.533210711390105
13 test.data.min()
14 Out[11]: 1.0

and Running on cluster for one night but got no results showed...

相关阅读:
php 上传大文件主要涉及配置upload_max_filesize和post_max_size两个选项
Linux 文件系统IO性能优化【转】
MOOC Linux内核之旅小结【转】
python实战===教你用微信每天给女朋友说晚安【转】
wxpy: 用 Python 玩微信【转】
AMBA总线协议AHB、APB、AXI对比分析【转】
高手进阶，终极内存技术指南——完整/进阶版 II (转)【转】
ARMCC和GCC编译ARM代码的软浮点和硬浮点问题【转】
程序员必知之浮点数运算原理详解【转】
Hash算法【转】

原文地址：https://www.cnblogs.com/fassy/p/7268682.html