• 训练词向量


     1 def word_vector_gener():
     2     """
     3     几种不同的方法来生成词向量
     4     :return:
     5     """
     6     from gensim.models import Word2Vec
     7     from gensim.test.utils import common_texts
     8     # 1.word2vec
     9     # 获取原始数据
    10     DATA_PATH = './word2vec_data.txt'
    11     word2evctor = open('./word2vector.txt', 'w', encoding='utf8')
    12     word_list = []
    13     finall = []
    14     # jieba分词
    15     with open(DATA_PATH, 'r', encoding='utf8') as file:
    16         for each_line in file.readlines():
    17             # 分词
    18             cut_word = list(jieba.cut(each_line.strip()))
    19             # 去停用词
    20             stopwords = [w.strip() for w in open('./stop_words.txt', 'r', encoding='utf8')]
    21             temp = []
    22             for each in cut_word:
    23                 if each not in stopwords and each.strip():
    24                     temp.append(each)
    25                     word_list.append(each)
    26             finall.append(temp)
    27     # 训练模型
    28     model = Word2Vec(finall, size=100, window=1, min_count=1, workers=4)
    29     model.save('./word2vec_model.')
    30     # 查看词向量
    31     for word in list(set(word_list)):
    32         content = str(word) + '	' + str(model[word])
    33         word2evctor.write(content+'
    ')
    34         print(content)
    35 
    36 
    37     print('ok')
    38 
    39 
    40 if __name__ == '__main__':
    41     word_vector_gener()
  • 相关阅读:
    javascrpt each map
    实现strcmp非常easy的思维
    POJ1300(欧拉回路)
    採用Android中的httpclient框架发送post请求
    蓝桥杯 带分数
    C++第11周(春)项目1
    每天一点儿JAVA-向量的操作
    js回车事件
    WebService(2)-XML系列之Java和Xml之间相互转换
    Android下的单元測试
  • 原文地址:https://www.cnblogs.com/demo-deng/p/9885157.html
Copyright © 2020-2023  润新知