python实现简单关联规则Apriori算法

 1 from itertools import combinations
 2 from copy import deepcopy
 3 
 4 
 5 # 导入数据，并剔除支持度计数小于min_support的1项集
 6 def load_data(data):
 7     I_dict = {}
 8     for i in data:
 9         for j in i:
10             I_dict[j] = I_dict.get(j, 0) + 1
11     F_dict = deepcopy(I_dict)
12     for k in I_dict.keys():
13         if F_dict.get(k) < min_support:
14             del F_dict[k]
15     return F_dict
16 
17 
18 # 判断频繁项集是否大于min_support
19 def get_support_set(p_set):
20     item_supp_set = []
21     for item in p_set:
22         count = 0
23         for ds in data_set:
24             if item.issubset(ds):
25                 count += 1
26         if count >= min_support:
27             item_supp_set.append([item, count])
28     return item_supp_set
29 
30 
31 # 找出所有频繁项集
32 # 以二项集为初始集
33 def get_all_items(two_set, k=3):
34     all_frequent = []
35     flag = True
36     while flag:
37         mid_set = []
38         temp = []
39         t_ = [ks[0] for ks in two_set]
40         for kk in t_:
41             for tt in kk:
42                 if tt not in temp:
43                     temp.append(tt)
44         k_ = [set(t) for t in combinations(temp, k)]
45         for ff in k_:
46             count_k = 0
47             for d in t_:
48                 if ff.issuperset(d):
49                     count_k += 1
50             if count_k == k:
51                 mid_set.append(ff)
52         frequent_mid_set = get_support_set(mid_set)
53         if mid_set:
54             k += 1
55             two_set = frequent_mid_set
56             all_frequent.extend(frequent_mid_set)
57         else:
58             flag = False
59     return all_frequent
60 
61 
62 if __name__ == '__main__':
63     data = [['I1', 'I2', 'I5'],
64             ['I2', 'I4'],
65             ['I2', 'I3'], 
66             ['I1', 'I2', 'I4'],
67             ['I1', 'I3'],
68             ['I2', 'I3'],
69             ['I1', 'I3'],
70             ['I1', 'I2', 'I3', 'I5'],
71             ['I1', 'I2', 'I3']]
72     data_set = [set(d) for d in data]
73     min_support = 1
74     one = [[{lk}, lv] for lk, lv in load_data(data).items()]
75     two = [set(t) for t in combinations(list(load_data(data).keys()), 2)]
76     two_f_set = get_support_set(two)
77     all_frequent_set = one + two_f_set + get_all_items(two_f_set)
78     for afs in all_frequent_set:
79         print(afs)

输出结果：

相关阅读:
Dedecms自定义表单后台列表展现方式更改
FileZilla出现Failed to convert command to 8 bit charset
织梦中data文件夹是存放什么内容的
Dedecms去掉URL中a目录的方法
FileZilla出现Failed to convert command to 8 bit charset
Linux虚拟主机通过FTP软件创建目录时提示550 Create Directory Operation Failed
CSharp设计模式读书笔记（1）：简单工厂模式(学习难度：★★☆☆☆，使用频率：★★★☆☆)
Ubuntu 10.04 Desktop 快速添加微软雅黑字体
firefox10的界面确实领先
ubuntu下断点续传工具 aria2

原文地址：https://www.cnblogs.com/laresh/p/7665777.html