• 51job词云


      爬取的51job上的python岗位任职要求,形成了词云:

    # coding:utf-8
    import jieba  #分词
    import  matplotlib.pyplot as plt #数据可视化
    import wordcloud
    from  wordcloud import WordCloud,ImageColorGenerator,STOPWORDS #词云
    import numpy  as np  #科学计算
    from PIL import Image  #处理图片
    
    #打开文本
    text = open("workinfo.txt","rb").read()  #rb二进制读取,
    textfile=text.decode("utf-8") #按照utf-8解码
    textfile=textfile.replace("span","").replace("style","").replace("font","").replace("nbsp","")
    textfile=textfile.replace("line","").replace("height","").replace("color","").replace("family","")
    textfile=textfile.replace("size","").replace("宋体","").replace("rgb","").replace("white","")
    textfile=textfile.replace("space","").replace("normal","").replace("backgroud","").replace("14px","")
    textfile=textfile.replace("br","").replace("岗位","").replace("职责","").replace("0px","")
    textfile=textfile.replace("Microsoft","").replace("YaHei","").replace("margin","").replace("top","")
    textfile=textfile.replace("以上","").replace("上学","").replace("div","").replace("li","").replace("以及","")
    textfile.replace("任职要求","")
    
    wordlist=jieba.cut_for_search(textfile)
    space_list=" ".join(wordlist)#链接词语
    backgroud=np.array(Image.open("2.jpg")) #背景图片
    mywordcloud=WordCloud(width=1800, height=1400,background_color="black", #背景颜色
                          mask=backgroud,#写字用的背景图,从背景图取颜色
                          max_words=200,  #最大词语数量
                          stopwords=STOPWORDS, #停止的默认词语
                          font_path="simkai.ttf", #字体
                          max_font_size=200, #最大字体尺寸
                          random_state=50,#随机角度
                          scale=2).generate(space_list) #生成词云
    
    plt.rcParams['figure.dpi'] = 1800  # 修改dpi  修改图片大小
    plt.rcParams['savefig.dpi']=1400   # 修改dpi
    image_color=ImageColorGenerator(backgroud) #生成词云的颜色
    plt.imshow(mywordcloud) #显示词云
    plt.axis("off") #关闭保存
    plt.savefig("python.png")
    plt.show()
  • 相关阅读:
    android webview cookie同步
    session和cookie
    对称加密与非对称加密
    理解java回调机制
    android studio命令
    android studio友盟gradle多渠道打包
    [c++] final override keyword
    [C++] Returning values by reference in C++
    [c++] polymorphism without virtual function
    [C++] NEW Advanced Usage
  • 原文地址:https://www.cnblogs.com/my-global/p/12447315.html
Copyright © 2020-2023  润新知