1 import jieba 2 from matplotlib import pyplot as plt 3 from wordcloud import WordCloud 4 from PIL import Image 5 import numpy as np 6 txt = (open("红楼梦.txt", "r", encoding='utf-8')).read() 7 file1 = open("stopwords_cn.txt") 8 file2 = open("stopwords_cn(more).txt") 9 ls1 = [] 10 while 1: 11 line = file1.readline() 12 new_word = line.strip() 13 if not line: 14 break 15 ls1.append(new_word) 16 ls2 = [] 17 while 1: 18 line = file2.readline() 19 new_word = line.strip() 20 if not line: 21 break 22 ls2.append(new_word) 23 ls = ls1+ls2 24 words = jieba.lcut(txt) 25 counts = {} 26 for word in words: 27 for i in ls: 28 if word == i: 29 continue 30 if (len(word)) == 1: 31 continue 32 else: 33 counts[word] = counts.get(word, 0) + 1 34 items = list(counts.items()) 35 items.sort(key=lambda x: x[1], reverse=True) 36 for i in range(15): 37 word, count = items[i] 38 print("{0:<10}{1:>5}".format(word, count)) 39 string = ' '.join(words) 40 print(len(string)) 41 img = Image.open('22.png') #打开图片 42 img_array = np.array(img) #将图片装换为数组 43 stopword=['什么', '一个', '我们', '那里', '你们', '如今', '起来', '知道', '这里', '众人', '他们', '出来', '自己', '说道', '听见', '两个', '姑娘', '不好', 44 '不知', '只见', '东西', '告诉'] #设置停止词,也就是你不想显示的词,这里这个词是我前期处理没处理好,你可以删掉他看看他的作用 45 stopword=stopword+ls 46 print(stopword) 47 wc = WordCloud( 48 background_color='white', 49 width=1000, 50 height=800, 51 mask=img_array, 52 font_path='./fonts/simhei.ttf', 53 stopwords=stopword 54 ) 55 wc.generate_from_text(string)#绘制图片 56 plt.imshow(wc) 57 plt.axis('off') 58 plt.figure() 59 plt.show() #显示图片 60 wc.to_file('new.png') #保存图片