import math
import random
from collections import defaultdict
from operator import itemgetter
def user_similarity(train):
"""
基于用户的协同过滤算法UserCF
:param train: 训练集
:return: 用户相似度矩阵
"""
# build inverse table for item_users
item_users = dict()
for u, items in train.items():
for i in items.keys():
if i not in item_users:
item_users[i] = set()
item_users[i].add(u)
# calculate co-rated items between users
c = dict()
n = defaultdict(int)
for i, users in item_users.items():
for u in users:
n[u] += 1
for v in users:
if u == v:
continue
c.setdefault(u, defaultdict(int))
c[u][v] += 1
# calculate finial similarity matrix w
w = dict()
for u, related_users in c.items():
for v, cuv in related_users.items():
w.setdefault(u, defaultdict(int))
w[u][v] = cuv / math.sqrt(n[u] * n[v])
return w
def user_similarity2(train):
"""
基于用户的协同过滤算法UserCF-IIF,添加热门物品惩罚因子
:param train: 训练集
:return: 用户相似度矩阵
"""
# build inverse table for item_users
item_users = dict()
for u, items in train.items():
for i in items.keys():
if i not in item_users:
item_users[i] = set()
item_users[i].add(u)
# calculate co-rated items between users
c = dict()
n = defaultdict(int)
for i, users in item_users.items():
for u in users:
n[u] += 1
for v in users:
if u == v:
continue
c.setdefault(u, defaultdict(int))
# 添加热门物品惩罚因子
c[u][v] += 1 / math.log(1 + len(users))
# calculate finial similarity matrix w
w = dict()
for u, related_users in c.items():
for v, cuv in related_users.items():
w.setdefault(u, defaultdict(int))
w[u][v] = cuv / math.sqrt(n[u] * n[v])
return w
def item_similarity(train):
"""
基于物品的协同过滤算法ItemCF
:param train: 训练集
:return: 物品相似度矩阵
"""
# calculate co-rated users between items
c = dict()
n = defaultdict(int)
for users, items in train.items():
for i in items:
n[i] += 1
c.setdefault(i, dict())
for j in items:
if i == j:
continue
c[i].setdefault(j, 0)
c[i][j] += 1
# calculate finial similarity matrix w
w = dict()
for i, related_items in c.items():
for j, cij in related_items.items():
w.setdefault(i, defaultdict(float))
w[i][j] = cij / math.sqrt(n[i] * n[j])
return w
def item_similarity2(train):
"""
基于物品的协同过滤算法ItemCF-IUF,添加对活跃性用户的惩罚因子
:param train: 训练集
:return: 物品相似度矩阵
"""
# calculate co-rated users between items
c = dict()
n = defaultdict(int)
for users, items in train.items():
for i in items:
n[i] += 1
c.setdefault(i, dict())
for j in items:
if i == j:
continue
c[i].setdefault(j, 0)
# ItemCF-IUF 添加对活跃性用户的惩罚因子
c[i][j] += 1 / math.log(1 + len(items) * 1.0)
# calculate finial similarity matrix w
w = dict()
for i, related_items in c.items():
for j, cij in related_items.items():
w.setdefault(i, defaultdict(float))
w[i][j] = cij / math.sqrt(n[i] * n[j])
return w
def item_similarity3(train):
"""
基于物品的协同过滤算法ItemCF-IUF,添加对活跃性用户的惩罚因子以及对相似矩阵的归一化处理
:param train: 训练集
:return: 物品相似度矩阵
"""
# calculate co-rated users between items
c = dict()
n = defaultdict(int)
for users, items in train.items():
for i in items:
n[i] += 1
c.setdefault(i, dict())
for j in items:
if i == j:
continue
c[i].setdefault(j, 0)
# ItemCF-IUF 添加对活跃性用户的惩罚因子
c[i][j] += 1 / math.log(1 + len(items) * 1.0)
# calculate finial similarity matrix w
w = dict()
for i, related_items in c.items():
for j, cij in related_items.items():
w.setdefault(i, defaultdict(float))
w[i][j] = cij / math.sqrt(n[i] * n[j])
# 添加对相似矩阵的归一化处理
for item in w:
max_value = max(w[item].values())
for items_related in w[item]:
w[item][items_related] /= max_value
return w
def recommend_by_item(train, user_id, w, k):
rank = defaultdict(float)
ru = train[user_id]
for i, pi in ru.items():
for j, wj in sorted(w[i].items(), key=itemgetter(1), reverse=True)[0:k]:
if j in ru:
continue
rank[j] += pi * wj
return rank
def recommend_by_user(user, train, w, k):
rank = defaultdict(float)
interacted_items = train[user]
for v, wuv in sorted(w[user].items(), key=itemgetter(1), reverse=True)[0:k]:
for i, rvi in train[v].items():
if i in interacted_items:
# we should filter items user interacted before
continue
rank[i] += wuv * rvi
return rank
if __name__ == '__main__':
train = {'A': {'a': 1, 'b': 1, 'd': 1}, 'B': {'a': 1, 'c': 1},
'C': {'b': 1, 'e': 1}, 'D': {'c': 1, 'd': 1, 'e': 1}}
rank = recommend_by_user('A', train, user_similarity(train), 3)
print('UserCF:', dict(rank))
rank2 = recommend_by_user('A', train, user_similarity2(train), 3)
print('UserCF-IIF:', dict(rank2))
train2 = {'A': {'a': 1, 'b': 1, 'd': 1}, 'B': {'b': 1, 'c': 1, 'e': 1},
'C': {'c': 1, 'd': 1}, 'D': {'b': 1, 'c': 1, 'd': 1},
'E': {'a': 1, 'd': 1}}
rank3 = recommend_by_item(train2, 'A', item_similarity(train2), 5)
print('ItemCF:', dict(rank3))
rank4 = recommend_by_item(train2, 'A', item_similarity2(train2), 5)
print('ItemCF-IUF:', dict(rank4))
rank5 = recommend_by_item(train2, 'A', item_similarity3(train2), 5)
print('ItemCF-IUF+Normalization:', dict(rank5))