• 利用python实现《数据挖掘——概念与技术》一书中描述的Apriori算法


     1 from itertools import combinations
     2 
     3 data = [['I1', 'I2', 'I5'], ['I2', 'I4'], ['I2', 'I3'], ['I1', 'I2', 'I4'], ['I1', 'I3'],
     4         ['I2', 'I3'], ['I1', 'I3'], ['I1', 'I2', 'I3', 'I5'], ['I1', 'I2', 'I3']]
     5 
     6 
     7 # 候选集生成
     8 # 输入:
     9 # f_set: k-1项集, k:项集个数
    10 # 输出:
    11 # k_cand:k项候选集
    12 def apriori_gen(f_set, k):
    13     k_cand = []
    14     temp = [frozenset(l) for l in combinations(f_set, k)]
    15     for t in temp:
    16         if has_infrequent_subset(t, f_set):
    17             del t
    18         else:
    19             k_cand.append(t)
    20     return k_cand
    21 
    22 # 非频繁项集的超集也是非频繁的
    23 def has_infrequent_subset(c_set, f_set):
    24     for subset in c_set:
    25         if not frozenset([subset]).issubset(f_set):
    26             return True
    27     return False
    28 
    29 # 输入(绝对)最小支持度, min_sup
    30 # 输出:全部频繁项集(不包括一项集), all_f_set
    31 def get_f_set(min_sup=2):
    32     all_f_set = []
    33     L1 = frozenset([d for ds in data for d in ds])
    34     k = 2
    35     size = len(L1)
    36     while k <= size:
    37         c_k = frozenset(apriori_gen(L1, k))
    38         for c in c_k:
    39             count = 0
    40             for d in data:
    41                 if c.issubset(frozenset(d)):
    42                     count += 1
    43             if count >= min_sup:
    44                 all_f_set.append((c, count))
    45         k += 1
    46     return all_f_set
    47 
    48 if __name__ == '__main__':
    49     all_frequent_set = get_f_set()
    50     for i in all_frequent_set:
    51         print(i)

  • 相关阅读:
    力扣(LeetCode) 14. 最长公共前缀
    力扣(LeetCode)965. 单值二叉树
    力扣(LeetCode)258. 各位相加
    力扣(LeetCode)389. 找不同
    PTA 阶乘之和取模
    A. Sea Battle
    PTA 二叉树路径
    PTA 重构二叉树
    PTA 笛卡尔树
    绿豆蛙的归宿
  • 原文地址:https://www.cnblogs.com/laresh/p/7788713.html
Copyright © 2020-2023  润新知