python3中的matplotlib绘图,绘制51job上抓取的python各职位的树形图:
import selenium #测试框架 import selenium.webdriver #模拟浏览器 import re import matplotlib import matplotlib.pyplot as plt #数据可视化 matplotlib.rcParams["font.sans-serif"]=["simhei"] #配置字体 默认不支持中文 matplotlib.rcParams["font.family"]="sans-serif" def getnumberbyname(searchname): url="https://search.51job.com/list/020000,000000,0000,00,9,99,"+searchname+",2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=" driver=selenium.webdriver.Chrome(executable_path="C:Program Files (x86)GoogleChromeApplicationchromedriver") #调用火狐浏览器 driver.get(url) #访问链接 pagesource=driver.page_source #抓取网页源代码 #print(pagesource) #打印源代码 # 正则表达式 # s匹配任何不可见字符,包括空格、制表符、换页符等等。等价于[ f v] #匹配任何可见字符。等价于[ ^ f v] [] 匹配其中的任意一个 restr="""<div class="rt">([sS]*?)</div>""" regex=re.compile(restr,re.IGNORECASE) mylist=regex.findall(pagesource) driver.close() #关闭 #print(mylist) if len(mylist)==0: print("失败") else: #print(mylist[0]) newstr=mylist[0].strip() #.strip() 去除前后空格空白符 regex1=re.compile("(d+)",re.IGNORECASE) mylist1 = regex1.findall(newstr) print(pystr+newstr) #print(mylist[0]) return mylist1[0] pythonlist=["python","python 运维","python 测试","python 数据","python web","python 爬虫"] num=0 for pystr in pythonlist: num+=1 eval(getnumberbyname(pystr)) plt.bar([num],[eval(getnumberbyname(pystr))],label=pystr) plt.legend() #绘制 plt.savefig("zhaopin.png") #保存图片到本地 #plt.show() #显示