from numpy import *
from numpy import linalg as la
def loadExData1():
return [[2,0,0,4,4,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,5],
[0,0,0,0,0,0,0,1,0,4,0],
[3,3,4,0,3,0,0,2,2,0,0],
[5,5,5,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,5,0,0,5,0],
[4,0,4,0,0,0,0,0,0,0,5],
[0,0,0,0,0,4,0,0,0,0,4],
[0,0,0,0,0,0,5,0,0,5,0],
[0,0,0,3,0,0,0,0,4,5,0],
[1,1,2,1,1,2,1,0,4,5,0]]
#相似度计算
def ecludSim(inA,inB):
return 1.0/(1.0 + la.norm(inA - inB))
def pearsSim(inA,inB):
if len(inA) < 3 : return 1.0
return 0.5+0.5*corrcoef(inA, inB, rowvar=0)[0][1]
def cosSim(inA, inB):
"""
:param inA: [a]
:param inB: [b]
:return: 1 or 0
"""
num = float(inA.T*inB)
demon = la.norm(inA)*la.norm(inB)
return 0.5+0.5*(num/demon) # 0.5+0.5*(a*b/abs(a*b))
# 基于物品相似度的推荐引擎
def standEst(dataMat, user, simMeas, item):
"""
:param dataMat: ex loadExdata1()
:param user: ex user=1
:param simMeas: cosSim()
:param item: ex # user=1对应的数据[0,0,0,0,0,0,0,0,0,0,5]的列=0的下标为 0 1 2 3 4 5 6 7 8 9
:return: 相似度
"""
n = shape(dataMat)[1]
simTotal = 0.0; ratSimTotal = 0.0
for j in range(n): # shape(dataMat):[x, n] x:数据集长度 n:维度
userRating = dataMat[user, j] # 取出user这条数据 a = np.array[[1,2,3],[0,2,1]]
if userRating == 0: # for i in [0,1,2]: print(a[1, i]) # 0 2 1
continue
# logical_and---- numpy逻辑与的判断
# logical_or---- numpy逻辑或的判断
# logical_not---- numpy逻辑非的判断
overLap = nonzero(logical_and(dataMat[:, item].A > 0,
dataMat[:, j].A > 0))[0]
if len(overLap) == 0:
similarity = 0
else:
similarity = simMeas(dataMat[overLap,item],
dataMat[overLap,j])
print('the %d and %d similarity is: %f'%(item, j, similarity))
simTotal += similarity
ratSimTotal += similarity * userRating
if simTotal == 0:
return 0
return ratSimTotal/simTotal
# 将一个11纬的矩阵转换成一个5维的矩阵,基于SVD的评3分估计
def svdEst(dataMat, user, simMeas, item):
n = shape(dataMat)[1] #获取物品的数量
simTotal = 0.0; ratSimTotal = 0.0
U,Sigma, VT = la.svd(dataMat)
Sig4 = mat(eye(4)*Sigma[:4])
xformedItems = dataMat.T*U[:,:4]*Sig4.I
for j in range(n):
userRating = dataMat[user,j]
if userRating == 0 or j==item: continue
similarity = simMeas(xformedItems[item,:].T,
xformedItems[j,:].T)
print('the %d and %d similarity is:%f'%(item, j, similarity))
simTotal += similarity
ratSimTotal += similarity * userRating
if simTotal == 0:return 0
else: return ratSimTotal/simTotal
def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
"""
:param dataMat: 测试数据集, ex loadExData1()
:param user: 用户ID所对应的行号index, ex user=1
:param N: default N=3 N个推荐结果,默认设为3
:param simMeas: 默认相关性函数cosSim
:param estMethod: 默认基于物品相似度的推荐函数standEst
:return: N个推荐结果
"""
unratedItems = nonzero(dataMat[user,:].A==0)[1] #返回user=1行[0,0,0,0,0,0,0,0,0,0,5],中元素为0的列下标
if len(unratedItems) == 0:
return 'you rated everything'
itemScores = []
for item in unratedItems: # [0 1 2 3 4 5 6 7 8 9]
estimatedScore = estMethod(dataMat, user, simMeas, item)
itemScores.append((item, estimatedScore))
return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]
if __name__ == '__main__':
data = mat(loadExData1())
re = recommend(data, 1)
print(re)