import jieba txt = open("C:/Users/86183/Desktop/三国演义.txt.txt","r",encoding = "utf-8").read() #打开文本 words = jieba.lcut(txt) counts = {} for word in words: if len(word) == 1: continue else: counts[word] = counts.get(word,0)+1 #统计 items = list(counts.items()) #此处得到类似 [('曹操', '953'), ('孔明', '836')] <-字典中的键和值组成了这样的列表 items.sort(key= lambda x:x[1],reverse=True) #排序。其中lambda x:x[1] 即将词频次数进行排序(Ture,从大到小) elem = list(counts.keys())[:200] for i in range(15): word ,count = items[i] elem.append(word) print("{:<10}{:>5}".format(word,count)) #输出频率最高的前15个词 import wordcloud import imageio mk = imageio.imread("C:/Users/86183/Desktop/微信图片_20200407164244.png") #选择底图 w = wordcloud.WordCloud(font_path="msyh.ttc",mask=mk,background_color="black",height=400,width=500) #设置词云参数,注意mask匹配底图 w.generate(" ".join(elem)) w.to_file("kongweenjun.png")
原图
词云图
代码是模仿Mooc课的