• trate


    from sklearn.feature_extraction.text import CountVectorizer
    from sklearn.feature_extraction.text import TfidfTransformer
    from sklearn.feature_selection import SelectKBest, chi2
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.naive_bayes import MultinomialNB
    import numpy as np
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn import metrics
    from text.textpredict import *
    from sklearn.cross_validation import *
    
    def chi22():
        train_words=["急需 钱用 不用 出售 如图 价值 千多 便宜 出售 出售 急 ",
                     "读 读 重复 读好输 不变 绿 求高人 指点迷津 ",
                     "诚召搛只呆家小时工,全职妈妈、在校学生、在家待业者、上班族、游戏者皆可做!每天5",
                     "发福利了 火熱找小莳工,每天在綫2--3小莳,莳涧地點没限制,薪资鈤结80--150/",
                     "急招小时工,每天在綫2--3小拭,时间地点没限制,薪资日结80--150/天,适 急招小时工,每天在綫2--3小拭,时间地点没限制,薪资日结80--150/天,适合学生党,手机党,上班族,有空闲时间者,有興趣缪系,QQ(937117723)咨询,此处不回!!",
                     "发福利来 火熱找小莳工,每天在綫2--3小莳,莳涧地點没限制,薪资鈤结80--150/",
                     "	读 不好 呜呜 ","这句 话 总是 知道 连读 ","求 师傅 交 口语 求有 耐心 老师 基础 学 ",
                     "听到 读 "
                     ]
        train_tags=[1,0,1,1,1,1,0,0,0,0]
    
        """
        ##就提取了词频CountVectorizer
        count_v1 = CountVectorizer(stop_words=None, max_df=0.5)
        counts_train = count_v1.fit_transform(train_words)
        ##卡方检验chi,配合selectkbest 对特征进行选择
        chi= SelectKBest(chi2,10)
        mychi2 = chi.fit(counts_train, train_tags)
        hi2_train = mychi2.transform(counts_train)
        clf = MultinomialNB(alpha=0.01)
        clf.fit(hi2_train, np.asarray(train_tags))
        priediced = cross_val_predict(clf, hi2_train, train_tags)
        print metrics.confusion_matrix(train_tags, priediced)
        """
        ##tf-idf
        Tfidf = TfidfVectorizer()
        tfidf_train = Tfidf.fit_transform(train_words)
        clf = MultinomialNB(alpha=0.01)
        clf.fit(tfidf_train, np.asarray(train_tags))
        priediced = cross_val_predict(clf, tfidf_train, train_tags)
        print metrics.confusion_matrix(train_tags, priediced)
    
    
    
        #print hi2_train
    
    
    chi22()
    

      

  • 相关阅读:
    全局函数和静态函数
    C语言变量总结
    #ifdef、#ifndef 与 #endif
    #include与#define的意义
    exit
    字符常量
    void *:万能指针
    算法(Algorithms)第4版 练习 链表类 1.3.19~1.3.29
    算法(Algorithms)第4版 练习 1.3.219
    算法(Algorithms)第4版 练习 1.3.20
  • 原文地址:https://www.cnblogs.com/GuoJiaSheng/p/5691455.html
Copyright © 2020-2023  润新知