• Python词云分析


     1 import jieba
     2 from matplotlib import pyplot as plt
     3 from wordcloud import WordCloud
     4 from PIL import Image
     5 import numpy as np
     6 txt = (open("红楼梦.txt", "r", encoding='utf-8')).read()
     7 file1 = open("stopwords_cn.txt")
     8 file2 = open("stopwords_cn(more).txt")
     9 ls1 = []
    10 while 1:
    11     line = file1.readline()
    12     new_word = line.strip()
    13     if not line:
    14         break
    15     ls1.append(new_word)
    16 ls2 = []
    17 while 1:
    18     line = file2.readline()
    19     new_word = line.strip()
    20     if not line:
    21         break
    22     ls2.append(new_word)
    23 ls = ls1+ls2
    24 words = jieba.lcut(txt)
    25 counts = {}
    26 for word in words:
    27     for i in ls:
    28         if word == i:
    29             continue
    30     if (len(word)) == 1:
    31         continue
    32     else:
    33         counts[word] = counts.get(word, 0) + 1
    34 items = list(counts.items())
    35 items.sort(key=lambda x: x[1], reverse=True)
    36 for i in range(15):
    37     word, count = items[i]
    38     print("{0:<10}{1:>5}".format(word, count))
    39 string = ' '.join(words)
    40 print(len(string))
    41 img = Image.open('22.png') #打开图片
    42 img_array = np.array(img) #将图片装换为数组
    43 stopword=['什么', '一个', '我们', '那里', '你们', '如今', '起来', '知道', '这里', '众人', '他们', '出来', '自己', '说道', '听见', '两个', '姑娘', '不好',
    44           '不知', '只见', '东西', '告诉']  #设置停止词,也就是你不想显示的词,这里这个词是我前期处理没处理好,你可以删掉他看看他的作用
    45 stopword=stopword+ls
    46 print(stopword)
    47 wc = WordCloud(
    48     background_color='white',
    49     width=1000,
    50     height=800,
    51     mask=img_array,
    52     font_path='./fonts/simhei.ttf',
    53     stopwords=stopword
    54 )
    55 wc.generate_from_text(string)#绘制图片
    56 plt.imshow(wc)
    57 plt.axis('off')
    58 plt.figure()
    59 plt.show()  #显示图片
    60 wc.to_file('new.png')  #保存图片

  • 相关阅读:
    android.database.CursorIndexOutOfBoundsException: Index -1 requested, with a size of 3
    display:inline-block的运用
    图解单片机8位PWM、16位PWM中“位”的含义!
    UVA10006
    [置顶] CF 86D Powerful array 分块算法入门,n*sqrt(n)
    俗人解释 三维渲染 在工作过程
    HDU 4414 Finding crosses(dfs)
    Codeforces 35E Parade 扫描线 + list
    hdu 4374 单调队列
    LeetCode OJ平台Sort Colors讨论主题算法
  • 原文地址:https://www.cnblogs.com/shixinzei/p/10224426.html
Copyright © 2020-2023  润新知