• sklearn中的朴素贝叶斯模型及其应用


    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import GaussianNB
    gnb=GaussianNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred).sum())
    
    150 6
    
    
    
    iris.target
    
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    
    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import BernoulliNB
    gnb=BernoulliNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred).sum())
    
    
    150 100
    
    
    iris.target
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    
    
    
    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import  MultinomialNB
    gnb= MultinomialNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred))
    
    150 [False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False  True False  True False
      True False False False False False False False False False False  True
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False  True False  True
     False  True False False False False False False False False False False
     False False False False False False]
    
    
    iris.target
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1,
           2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    from  sklearn.naive_bayes import GaussianNB
    from sklearn.model_selection import cross_val_score
    gnb=GaussianNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Accuracy:%.15f"%scores.mean())
    
    
    Accuracy:0.953333333333333
    
    scores
    array([0.93333333, 0.93333333, 1.        , 0.93333333, 0.93333333,
           0.93333333, 0.86666667, 1.        , 1.        , 1.        ])
    
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.model_selection import cross_val_score
    gnb=BernoulliNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Acdcuracy:%.3f"%scores.mean())
    
    Acdcuracy:0.333
    
    scores
    array([0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333,
           0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333])
    
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.model_selection import cross_val_score
    gnb=MultinomialNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Acdcuracy:%.15f"%scores.mean())
    
    Acdcuracy:0.953333333333333
    
    
    scores
    
    array([1.        , 1.        , 1.        , 0.93333333, 0.86666667,
           0.93333333, 0.8       , 1.        , 1.        , 1.        ])
    
    
    import csv
    with open(r'd:/SMSSpamCollectionjsn.txt',encoding = "utf-8")as file_path:
    # with open('C:UsersAdministratorDesktopSMSSpamCollection.csv','r',encoding='utf-8')as file_path:
        sms=file_path.read()
    # print(sms)
    sms_data=[]
    sms_label=[]
    reader=csv.reader(sms,delimiter='	')
    for  line in reader:
        sms_label.append(line[0])
        sms_data.append(line[1])
    sms.colse()
     sms_data
    
    
    
    cc=sms.replace('.',' ')
    cclist=sms.split()
    print(len(cc),cclist)
    ccset=set(cclist)
    print(ccset)
    strDict={}
    for star in ccset:
        strDict[star]=sms.count(star)
    for key in ccset:
        print(key,strDict[key])
    wclist=list(ccsetr.items())
    print(wclist)
    def takeSecond(elem):
        return elem[1]
    wclist.sort(key=takeSecond,reverse=True)
    print(wclist)
    
    
    
    ',', 'I', 'need', 'you,', 'I', 'crave', 'you', '...', 'But', 'most', 'of', 'all', '...', 'I', 'love', 'you', 'my', 'sweet', 'Arabian', 'steed', '...', 'Mmmmmm', '...', 'Yummy"', 'spam', '07732584351', '-', 'Rodger', 'Burns', '-', 'MSG', '=', 'We', 'tried', 'to', 'call', 'you', 're', 'your', 'reply', 'to', 'our', 'sms', 'for', 'a', 'free', 'nokia', 'mobile', '+', 'free', 'camcorder.', 'Please', 'call', 'now', '08000930705', 'for', 'delivery', 'tomorrow', 'ham', 'WHO', 'ARE', 'YOU', 'SEEING?', 'ham', 'Great!', 'I', 'hope', 'you', 'like', 'your', 'man', 'well', 'endowed.', 'I', 'am', '<#>', 'inches...', 'ham', 'No', 'calls..messages..missed', 'calls', 'ham', "Didn't", 'you', 'get', 'hep', 'b', 'immunisation', 'in', 'nigeria.', 'ham', '"Fair', 'enough,', 'anything', 'going', 'on?"', 'ham', '"Yeah', 'hopefully,', 'if', 'tyler', "can't", 'do', 'it', 'I', 'could', 'maybe', 'ask', 'around', 'a', 'bit"', 'ham', 'U', "don't", 'know', 'how', 'stubborn', 'I', 'am.', 'I', "didn't", 'even', 'want', 'to', 'go', 'to', 'the', 'hospital.', 'I', 'kept', 'telling', 'Mark', "I'm", 'not', 'a', 'weak', 'sucker.', 'Hospitals', 'are', 'for', 'weak', 'suckers.', 'ham', 'What', 'you', 'thinked', 'about', 'me.', 'First', 'time', 'you', 'saw', 'me', 'in', 'class.', 'ham', '"A', 'gram', 'usually', 'runs', 'like', '<#>', ',', 'a', 'half', 'eighth', 'is', 'smarter', 'though'
    
    
    
    from nltk.corpus import stopwords
    stops=stopwords.words('english')
    stops
    
    
    ['i',
     'me',
     'my',
     'myself',
     'we',
     'our',
     'ours',
     'ourselves',
     'you',
     "you're",
     "you've",
     "you'll",
     "you'd",
     'your',
     'yours',
     'yourself',
     'yourselves',
     'he',
     'him',
     'his',
     'himself',
     'she',
    
  • 相关阅读:
    外贸视频教程[外贸人zencart自助建站视频教程]:第一课
    外贸视频教程[外贸人zencart自助建站视频教程]:第二课
    行sqlSQL*PLUS使用(三)
    消息函数windows 程序设计 第三章 (下)
    优化性能[置顶] Android应用性能优化方案
    自定义方法JSP自定义标签
    发票名称<iframe name=document.getElementById("cellFrame").src = "dyszAction!showFpDyMb.do?fpzldm=" + fpzldm;
    纹理寻址DirectX入门 (8) TextureAddressMode
    空间复杂度分段分段有序数组合并成有序(空间复杂度为O(1))
    数据库生成T4模版在代码生成中的应用心得
  • 原文地址:https://www.cnblogs.com/cc013/p/10028636.html
Copyright © 2020-2023  润新知