• Experimenrs on Foursquare


     1 import pandas as pd
     2 import numpy as np
     3 import codecs
     4 import math
     5 
     6 doc1 = codecs.open('dataset_TIST2015/dataset_TIST2015_Checkins.txt','rU','latin-1')
     7 checkins = pd.read_csv(doc1, delimiter='	')
     8 checkins.columns=['userId', 'venueId', 'timeUTC', 'timeOffset']
     9 
    10 len(checkins['userId'].unique())
    11 len(checkins['venueId'].unique())
    12 
    13 from sklearn.preprocessing import LabelEncoder
    14 from scipy.sparse import csr_matrix
    15 
    16 venueIdencoder = LabelEncoder().fit(checkins['venueId'])
    17 userIdencoder = LabelEncoder().fit(checkins['userId'])
    18 
    19 checkins['venueIdencoded'] = venueIdencoder.transform(checkins['venueId'])
    20 n_venues = len(venueIdencoder.classes_)
    21 
    22 from sklearn.cross_validation import train_test_split
    23 
    24 train_df, test_df = train_test_split(checkins, train_size = 0.8)
    25 
    26 train = csr_matrix((np.ones(train_df.shape[0]), (train_df.userId, train_df.venueIdencoded)), shape=((train_df.userId.max()+1),n_venues))
    27 
    28 test = csr_matrix((np.ones(test_df.shape[0]), (test_df.userId, test_df.venueIdencoded)), shape=((test_df.userId.max()+1),n_venues))
    29 
    30 #print(test.nnz)
    31 #print(train.nnz)
    32 
    33 #print(test.max())
    34 #print(train.max())
    35 
    36 from lightfm import LightFM
    37 from lightfm.evaluation import auc_score
    38 
    39 NUM_THREADS = 1
    40 NUM_COMPONENTS = 30
    41 NUM_EPOCHS = 1
    42 ITEM_ALPHA = math.exp(-6)
    43 
    44 model = LightFM(loss='warp',
    45                 item_alpha=ITEM_ALPHA,
    46                 no_components=NUM_COMPONENTS)
    47 
    48 model.fit(train,epochs=NUM_EPOCHS,num_threads=NUM_THREADS)
    49 
    50 
    51 train_auc = auc_score(model, train,num_threads=NUM_THREADS).mean()
    52 test_auc = auc_score(model, test,train_interactions=train,num_threads=NUM_THREADS).mean()
    53 
    54 print("Train_auc is %f" %train_auc)
    55 print("Test_aus is %f" %test_auc)

    Some problems :

    Expect to get a binary marix but no...

    Here is the code in console:

     1 train
     2 Out[6]: 
     3 <266910x3680125 sparse matrix of type '<class 'numpy.float64'>'
     4     with 12774460 stored elements in Compressed Sparse Row format>
     5 train.data.max()
     6 Out[7]: 520.0
     7 train.data.min()
     8 Out[8]: 1.0
     9 test.data.max()
    10 Out[9]: 140.0
    11 test.data.mean()
    12 Out[10]: 1.533210711390105
    13 test.data.min()
    14 Out[11]: 1.0        

     

    and Running on cluster for one night but got no results showed...

     

     

  • 相关阅读:
    php 上传大文件主要涉及配置upload_max_filesize和post_max_size两个选项
    Linux 文件系统IO性能优化【转】
    MOOC Linux内核之旅小结【转】
    python实战===教你用微信每天给女朋友说晚安【转】
    wxpy: 用 Python 玩微信【转】
    AMBA总线协议AHB、APB、AXI对比分析【转】
    高手进阶,终极内存技术指南——完整/进阶版 II (转)【转】
    ARMCC和GCC编译ARM代码的软浮点和硬浮点问题 【转】
    程序员必知之浮点数运算原理详解【转】
    Hash算法【转】
  • 原文地址:https://www.cnblogs.com/fassy/p/7268682.html
Copyright © 2020-2023  润新知