• 贝叶斯推断|朴素贝叶斯分类|贝叶斯定理


    近期,由于项目需求,需要用到贝叶斯定理及其相关知识,于是又系统的学习了一下,顺便做一下笔记。

    参考资料:

    代码(非常详细的注释):

    #-*- coding:utf-8 -*-
    import copy #用于深度拷贝,适用于复杂的数据结构
    #复杂的数据结构看不懂,一定要在纸上画图,画出来就一目了然了
    
    class native_bayes:
    
        def __init__(self, character_vec_, class_vec_):
            """
            # 缩进必须正确,不然会报错
            构造函数,传入的参数请看最底下的函数调用
            character_vec_:[("character_A",["A1","A2","A3"]), ("character_B",["B1","B2","B3"])] 是一个嵌套数据结构,最外层是一个列表,内层是元组,元组里还有列表
            class_vec_:["class_X", "class_Y"]
            """
            character_condition_per = {} #创建一个数据结构,建议在纸上画出结构图
            #这是一个嵌套的三层字典,用于统计计数
            for character_name in character_vec_:
                character_condition_per[character_name[0]] = {}
                for character_value in character_name[1]:
                    character_condition_per[character_name[0]][character_value] = {
                        'num':0, # 记录该类别下该特征值在训练样本中的数量
                        'condition_per':0.0 # 记录该类别下各个特征值的条件概率
                    }
            self.class_set = {} # 记录该类别下各个特征值的条件概率
            #这是一个两层字典,内嵌一个三层字典
            for class_name in class_vec_:
                self.class_set[class_name] = {
                    'num':0, # 记录该类别在训练样本中的数量
                    'class_per':0.0, # 记录该类别在训练样本中的先验概率
                    'character_condition_per':copy.deepcopy(character_condition_per) #将上面的三层字典全部嵌套过来了
                }
            #print("init", character_vec_, self.class_set) #for debug
                
        def learn(self, sample_):
            """
            learn是训练函数,传入的参数为sample_:
            [
                {
                    'character'  : {'character_A':'A1'}, #特征向量
                    'class_name' : 'class_X'             #类别名称
                }
            ]
            """
            for each_sample in sample_:
                character_vec_ = each_sample['character']
                class_name = each_sample['class_name']
                data_for_class = self.class_set[class_name]
                data_for_class['num'] += 1
                
                # 各个特质值样本数量加1
                for character_name in character_vec_: #默认迭代的字典的键
                    character_value =  character_vec_[character_name]
                    data_for_character = data_for_class['character_condition_per'][character_name][character_value]
                    data_for_character['num'] += 1
            
            # 数量计算完毕, 计算最终的概率值        
            sample_num = len(sample_)
            for each_sample in sample_:
                character_vec_ = each_sample['character']
                class_name = each_sample['class_name']
                data_for_class = self.class_set[class_name]
                # 计算类别的先验概率
                data_for_class['class_per'] = float(data_for_class['num'])/sample_num
                
                # 各个特质值的条件概率
                for character_name in character_vec_:
                    character_value = character_vec_[character_name]
                    data_for_character = data_for_class['character_condition_per'][character_name][character_value]
                    data_for_character['condition_per'] = float(data_for_character['num'] / data_for_class['num'])
            # from pprint import pprint
            # pprint(self.class_set)  #for debug
            
        def classify(self, input_):
            """
            分类函数:输入参数input_:
            {
                "character_A":"A1",
                "character_B":"B3",
            }
            """
            best_class = ''
            max_per = 0.0
            for class_name in self.class_set:
                class_data = self.class_set[class_name]
                per = class_data['class_per']
                # 计算各个特征值条件概率的乘积
                for character_name in input_:
                    character_per_data = class_data['character_condition_per'][character_name]
                    per = per * character_per_data[input_[character_name]]['condition_per']
                print (class_name, per)
                if per >=max_per:
                    best_class = class_name
                  
            return best_class
                    
    
    #命名规则:函数参数后面加_,正常的则不加,非常容易区分    
    #台头
    character_vec = [("character_A",["A1","A2","A3"]),("character_B",["B1","B2","B3"])]
    class_vec = ["class_X","class_Y"]
    
    bayes = native_bayes(character_vec, class_vec)        #创建对象
    
    sample = [  #创建训练集
                {
                    'character'  : {'character_A':'A1', 'character_B':'B1'}, #特征向量
                    'class_name' : 'class_X'             #类别名称
                },
                {
                    'character'  : {'character_A':'A3', 'character_B':'B1'}, #特征向量
                    'class_name' : 'class_X'             #类别名称
                },
                {
                    'character'  : {'character_A':'A3', 'character_B':'B3'}, #特征向量
                    'class_name' : 'class_X'             #类别名称
                },
                {
                    'character'  : {'character_A':'A2', 'character_B':'B2'}, #特征向量
                    'class_name' : 'class_X'             #类别名称
                },
                {
                    'character'  : {'character_A':'A2', 'character_B':'B2'}, #特征向量
                    'class_name' : 'class_Y'             #类别名称
                },
                {
                    'character'  : {'character_A':'A3', 'character_B':'B1'}, #特征向量
                    'class_name' : 'class_Y'             #类别名称
                },
                {
                    'character'  : {'character_A':'A1', 'character_B':'B3'}, #特征向量
                    'class_name' : 'class_Y'             #类别名称
                },
                {
                    'character'  : {'character_A':'A1', 'character_B':'B3'}, #特征向量
                    'class_name' : 'class_Y'             #类别名称
                },
                
            ]
    
    input_data = { # 测试集
        "character_A":"A1",
        "character_B":"B3"
    }
    
    bayes.learn(sample) #学习
    print(bayes.classify(input_data)) #测试
  • 相关阅读:
    用spring的InitializingBean作初始化
    LinkedList源码分析
    CgLib动态代理
    DB2删除表分区
    spring aop搭建redis缓存
    List怎么遍历删除元素
    线程池ExecutorService和完成服务CompletionService的使用获取线程的返回结果
    synchronized与static synchronized 的区别
    将spring管理的bean使用注解的方式注入到servlet中
    eclipse右击打war包class没打上去的问题
  • 原文地址:https://www.cnblogs.com/leezx/p/5821129.html
Copyright © 2020-2023  润新知