• Python统计excel表格中文本的词频,生成词云图片


    import xlrd
    import jieba
    import pymysql
    import matplotlib.pylab as plt
    from wordcloud import WordCloud
    from collections import Counter
    import numpy as np
    
    def getExcelData(excel,txt):
        readbook = xlrd.open_workbook(excel)
        sheet = readbook.sheet_by_index(2) #取第二个sheet页
        rows = sheet.nrows
        i = 0
        while i < rows:
            txt += sheet.cell(i, 2).value #取第三列的值
            i += 1
        seg_list = jieba.cut(txt)
        c = Counter()
        result = {}
        for x in seg_list:
            if len(x) > 1 and x != '
    ':
                c[x] += 1
        for (k, v) in c.most_common():
            result[k] = v #放到字典中,用于生成词云的源数据
        return result
    
    def makeWordCloud(txt):
        x, y = np.ogrid[:300, :500]
    
        mask = (x - 150) ** 2 + (y - 150) ** 2 > 150 ** 2
        mask = 255 * mask.astype(int)
    
        wc = WordCloud(background_color="white",
                        max_words=500,
                        mask=mask,
                        repeat=True,
                        width=1000,
                        height=1000,
                        scale=4, #这个数值越大,产生的图片分辨率越高,字迹越清晰
                        font_path="C:WindowsFontsSTXINGKA.TTF")
        wc.generate_from_frequencies(txt)
        wc.to_file('abc.png')
    
        plt.axis("off")
        plt.imshow(wc, interpolation="bilinear")
        plt.show()
    
    
    if __name__ == '__main__':
        txt = ''
        makeWordCloud(getExcelData('getdata.xlsx', txt))
    

      

  • 相关阅读:
    linux下配置java环境
    CentOS6 配置静态IP
    数据库的事务
    MySQL总论
    JDBC面试题
    scp命令
    大数据练习题
    Linux下的Mysql安装 & 配置
    Hive的安装配置 & 基础指令
    本地存储localStorage以及它的封装接口store.js的使用
  • 原文地址:https://www.cnblogs.com/ning-blogs/p/10491361.html
Copyright © 2020-2023  润新知