• python3 LDA主题模型以及TFIDF实现


    import codecs  #主题模型
    from gensim import corpora
    from gensim.models import LdaModel
    from gensim import models
    from gensim.corpora import Dictionary
    te = []
    fp = codecs.open('input.txt','r')
    for line in fp:
        line = line.split(',')
        te.append([ w  for w in line ])
    print ('输入文本数量:',len(te)) 
    dictionary = corpora.Dictionary(te)
    corpus = [ dictionary.doc2bow(text) for text in te ]
    tfidf = models.TfidfModel(corpus)
    corpus_tfidf = tfidf[corpus]
    print(list(corpus_tfidf))#输出词的tfidf
    print(list(corpus))#输出文本向量空间
    lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=20,passes=100) 
    doc_topic = [a for a in lda[corpus]]
    topics_r = lda.print_topics(num_topics = 20, num_words =20)
    topic_name = codecs.open('topics_result3.txt','w')
    for v in topics_r:
        topic_name.write(str(v)+'
    ')
    fp2 = codecs.open('documents_result.txt','w')
    for t in doc_topic:
        c = []
        c.append([a[1] for a in t])
        print(t)
        m = max(c[0])
        
        for i in range(0, len(t)):
            if m in t[i]:
                #print(t[i])
                fp2.write(str(t[i][0]) + '  ' + str(t[i][1]) + '
    ')#输出模型类和概览
                break
  • 相关阅读:
    win7同时安装python2和python3
    Centos6.8安装python3.6
    Typescript的接口
    ES5中的类相关/Typescript的类相关
    Typescript介绍
    Global Interpreter Lock 全局解释器锁
    Go语言设计模式(五)
    Go语言设计模式(四)
    Go语言反射
    Go语言程序设计(三)
  • 原文地址:https://www.cnblogs.com/oikoumene/p/9798259.html
Copyright © 2020-2023  润新知