• 推荐算法 pd


    from numpy import *
    from numpy import linalg as la
    
    
    def loadExData1():
        return [[2,0,0,4,4,0,0,0,0,0,0],
                [0,0,0,0,0,0,0,0,0,0,5],
                [0,0,0,0,0,0,0,1,0,4,0],
                [3,3,4,0,3,0,0,2,2,0,0],
                [5,5,5,0,0,0,0,0,0,0,0],
                [0,0,0,0,0,0,5,0,0,5,0],
                [4,0,4,0,0,0,0,0,0,0,5],
                [0,0,0,0,0,4,0,0,0,0,4],
                [0,0,0,0,0,0,5,0,0,5,0],
                [0,0,0,3,0,0,0,0,4,5,0],
                [1,1,2,1,1,2,1,0,4,5,0]]
    
    
    #相似度计算
    def ecludSim(inA,inB):
        return 1.0/(1.0 + la.norm(inA - inB))
    
    def pearsSim(inA,inB):
        if len(inA) < 3 : return 1.0
        return 0.5+0.5*corrcoef(inA, inB, rowvar=0)[0][1]
    
    def cosSim(inA, inB):
        """
        :param inA: [a]
        :param inB: [b]
        :return: 1 or 0
        """
        num = float(inA.T*inB)
        demon = la.norm(inA)*la.norm(inB)
        return 0.5+0.5*(num/demon)  # 0.5+0.5*(a*b/abs(a*b))
    
    #  基于物品相似度的推荐引擎
    def standEst(dataMat, user, simMeas, item):
        """
        :param dataMat: ex loadExdata1()
        :param user: ex user=1
        :param simMeas: cosSim()
        :param item: ex  # user=1对应的数据[0,0,0,0,0,0,0,0,0,0,5]的列=0的下标为 0 1 2 3 4 5 6 7 8 9
        :return: 相似度
        """
        n = shape(dataMat)[1]
        simTotal = 0.0; ratSimTotal = 0.0
        for j in range(n):  # shape(dataMat):[x, n] x:数据集长度  n:维度
            userRating = dataMat[user, j]  # 取出user这条数据 a = np.array[[1,2,3],[0,2,1]]
            if userRating == 0:                  #   for i in [0,1,2]: print(a[1, i])  # 0 2 1
                continue
            # logical_and----  numpy逻辑与的判断
            # logical_or----  numpy逻辑或的判断
            # logical_not----  numpy逻辑非的判断
            overLap = nonzero(logical_and(dataMat[:, item].A > 0, 
                                         dataMat[:, j].A > 0))[0]
            if len(overLap) == 0:
                similarity = 0
            else:
                similarity = simMeas(dataMat[overLap,item],
                                     dataMat[overLap,j])
            print('the %d and %d similarity is: %f'%(item, j, similarity))
            simTotal += similarity
            ratSimTotal += similarity * userRating
        if simTotal == 0:
            return 0
        return ratSimTotal/simTotal
    
    # 将一个11纬的矩阵转换成一个5维的矩阵,基于SVD的评3分估计
    def svdEst(dataMat, user, simMeas, item):
        n = shape(dataMat)[1]      #获取物品的数量
        simTotal = 0.0; ratSimTotal = 0.0
        U,Sigma, VT = la.svd(dataMat)
        Sig4 = mat(eye(4)*Sigma[:4])
        xformedItems = dataMat.T*U[:,:4]*Sig4.I
        for j in range(n):
            userRating = dataMat[user,j]
            if userRating == 0 or j==item: continue
            similarity = simMeas(xformedItems[item,:].T,
                                xformedItems[j,:].T)
            print('the %d and %d similarity is:%f'%(item, j, similarity))
            simTotal += similarity
            ratSimTotal += similarity * userRating
        if simTotal == 0:return 0
        else: return ratSimTotal/simTotal
    
    
    def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
        """
        :param dataMat: 测试数据集, ex loadExData1()
        :param user: 用户ID所对应的行号index, ex user=1
        :param N: default N=3 N个推荐结果,默认设为3
        :param simMeas: 默认相关性函数cosSim
        :param estMethod: 默认基于物品相似度的推荐函数standEst
        :return: N个推荐结果
        """
        unratedItems = nonzero(dataMat[user,:].A==0)[1]   #返回user=1行[0,0,0,0,0,0,0,0,0,0,5],中元素为0的列下标
        if len(unratedItems) == 0:
            return 'you rated everything'
        itemScores = []
        for item in unratedItems:  # [0 1 2 3 4 5 6 7 8 9]
            estimatedScore = estMethod(dataMat, user, simMeas, item)
            itemScores.append((item, estimatedScore))
        return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]
    
    
    if __name__ == '__main__':
        data = mat(loadExData1())
        re = recommend(data, 1)
        print(re)
    from django.db import connection
      select_sql = 'select * from model'
        datas = pd.read_sql(select_sql, connection)  # <pandas.core.frame.DataFrame'>
        temp = datas.iloc[:, 2:]  # 取出所有数据的 除了前两个字段
        tp = temp.sum(axis=0)  # 所有字段纵向相加
        top_sorts = tp.sort_values(ascending=False)  # 降序排序
        top3 = top_sorts.index[:4]
        top_recommends = top3.values.tolist()
  • 相关阅读:
    还是java中的编码问题
    java restful api
    编码方式
    LinkedHash
    Zoj 2562 More Divisors (反素数)
    spark复习总结03
    spark复习总结02
    spark复习总结01
    使用二进制解决一个字段代表多个状态的问题
    spark性能调优05-troubleshooting处理
  • 原文地址:https://www.cnblogs.com/tangpg/p/9815742.html
Copyright © 2020-2023  润新知