• 【词云】代码


     1 import sys
     2 reload(sys)
     3 sys.setdefaultencoding('utf-8')
     4 
     5 from os import path
     6 from PIL import Image
     7 import numpy as np
     8 import matplotlib.pyplot as plt
     9 
    10 from wordcloud import WordCloud
    11 import jieba
    12 d = path.dirname(__file__)
    13 
    14 stopWordFile = u'stopwords.txt'
    15 stopWordList = []
    16 for L in open(path.join(d , stopWordFile)).readlines():
    17     stopWordList.append(L.strip().decode('utf-8'))
    18 stopWordList = set(stopWordList)
    19 
    20 new = 'words.txt'
    21 
    22 text = open(path.join( d , new )).read().strip('
    ')
    23 wordDict = {}
    24 for w in jieba.cut(text):
    25     if w not in stopWordList:
    26         wordDict.setdefault(w , 0)
    27         wordDict[w] += 1
    28     
    29 maskImg = 'china.jpeg'
    30 alice_mask = np.array( Image.open(path.join(d , maskImg)))
    31 
    32 wc = WordCloud(background_color = 'white', max_words = 2000 , mask = alice_mask ,
    33                font_path = path.join(d , 'msyh.ttf'))
    34 wc.generate_from_frequencies(wordDict)
    35 
    36 wc.to_file(path.join(d, "example.png"))
    37 
    38 # show
    39 plt.imshow(wc, interpolation='bilinear')
    40 plt.axis("off")
    41 plt.figure()
    42 plt.imshow(alice_mask, cmap=plt.cm.gray, interpolation='bilinear')
    43 plt.axis("off")
    44 plt.show()
  • 相关阅读:
    Shell-17--break-exit-continue-shift
    Shell-16--函数
    Shell-14--awk
    TCP带外数据读写
    Linux网络编程-----Socket地址API
    log4j日志级别
    JNI-Test
    游戏服务器当中的唯一名设计方法
    String和bytes的编码转换
    base64的一个应用情景
  • 原文地址:https://www.cnblogs.com/colipso/p/6836037.html
Copyright © 2020-2023  润新知