使用Python爬虫去天气预报网站爬取天气数据存储至MySQL然后使用pyecharts实现绘图
本次代码可以在gitee下载https://gitee.com/liuyueming/weatherSpider.git
一,环境查看
Python版本
C:\Users\liuym\Desktop\weatherSpider>python --version Python 3.6.6
MySQL版本
mysql --version mysql Ver 14.14 Distrib 5.7.22, for Linux (x86_64) using EditLine wrapper
二,代码
安装模块
pip3 install pymysql pip3 install bs4 pip3 install lxml pip3 install requests pip3 install pyecharts
运行过程中遇到没有安装的库使用pip install安装即可
本次爬取的天气预报网站为 http://www.tianqihoubao.com/
主程序main.py
import pymysql import requests from bs4 import BeautifulSoup db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' ) cursor = db.cursor() #获取网页信息 def get_html(url): html = requests.get(url) html.encoding = html.apparent_encoding soup = BeautifulSoup(html.text, 'lxml') return soup year = ['2020'] month = ['01', '02', '03', '04','05', '06', '07', '08', '09', '10', '11', '12'] time = [y+x for y in year for x in month] for date in time: url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html' soup = get_html(url) sup = soup.find('table',attrs={'class':'b'}) tr = sup.find_all('tr') for trl in tr[1:]: td = trl.find_all('td') href = td[0].find('a')['href'] #获取链接信息 title = td[0].find('a')['title'] #获取名称 weather = td[1].get_text().replace('\r\n','').replace(' ','') #获取天气状况 wendu = td[2].get_text().strip().replace(' ','').replace('\r\n','')#获取温度 fengli = td[3].get_text().strip().replace(' ','').replace('\r\n','') #获取风力大小 sql = """insert into weather_spider(time_local, link, weather_type, temperature, wind_power) \ values(%s, %s, %s, %s, %s)""" cursor.execute(sql, (title, href, weather, wendu, fengli)) db.commit() db.close print('爬取完成')
代码解析
db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' ) # 数据库连接信息,根据实际情况修改
year = ['2020'] # 需要爬取的年份信息
url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html' # 需要爬取的城市信息 本次为南昌
生成html程序myVisualize.py
import pymysql import pyecharts.options as opts from pyecharts.charts import Line, Pie def create_temp(): db = pymysql.connect(host="localhist", user="root", passwd="123456", db="weather", charset='utf8' ) cursor = db.cursor() cursor.execute('SELECT * FROM weather_spider;') data = cursor.fetchall() max_temp_list = [] min_temp_list = [] day_list = [] for d in data: max_temp_list.append(d[3].split('/')[0].replace('℃', '')) min_temp_list.append(d[3].split('/')[1].replace('℃', '')) day_list.append(d[0][:11]) line = Line() line.add_xaxis(day_list) line.add_yaxis(series_name="最高气温", y_axis=max_temp_list, is_symbol_show = False, markpoint_opts=opts.MarkPointOpts( data=[ opts.MarkPointItem(type_="max", name="最大值"), opts.MarkPointItem(type_="min", name="最小值"), ] ), markline_opts=opts.MarkLineOpts( data=[opts.MarkLineItem(type_="average", name="平均值")] )) line.add_yaxis(series_name="最低气温", y_axis=min_temp_list, is_symbol_show = False, markpoint_opts=opts.MarkPointOpts( data=[ opts.MarkPointItem(type_="max", name="最大值"), opts.MarkPointItem(type_="min", name="最小值"), ] ), markline_opts=opts.MarkLineOpts( data=[opts.MarkLineItem(type_="average", name="平均值")] )) line.set_global_opts(yaxis_opts=opts.AxisOpts(name="温度(℃)"), title_opts=opts.TitleOpts(title="南昌气温变化表"), tooltip_opts=opts.TooltipOpts(trigger="axis")) line.render('南昌2020气温变化表.html') print('气温图生成成功') db.close() cursor.close() def create_weather(): db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' ) cursor = db.cursor() attr = ["雨", "多云", "晴", "阴", "雪", "雾", "霾"] rain = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雨%";') cloud = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%多云%";') sun = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%晴%";') overcast = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%阴%";') snow = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雪%";') fog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雾%";') smog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%霾%";') weather = [rain, cloud, sun, overcast, snow, fog, smog] pie = ( Pie() .add("", [list(z) for z in zip(attr, [rain, cloud, sun, overcast, snow, fog, smog])]) .set_global_opts(title_opts=opts.TitleOpts(title="天气占比表")) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) ) pie.render('南昌2020天气占比表.html') print('天气图生成成功') db.close() cursor.close() if __name__ == '__main__': create_temp() create_weather()
MySQL操作(安装MySQL不详述)
创建库
create databese weather;
导入表
mysql -uroot -pioYbcZ1u -h127.0.0.1 weather < weather.sql
表语句sql如下weather.sql
DROP TABLE IF EXISTS `weather_spider`; CREATE TABLE `weather_spider` ( `time_local` varchar(255) DEFAULT NULL, `link` varchar(255) DEFAULT NULL, `weather_type` varchar(255) DEFAULT NULL, `temperature` varchar(255) DEFAULT NULL, `wind_power` varchar(255) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
三,运行
运行主程序
python main.py
运行正常会往MySQL数据库写入数据,登录数据库搜索查看
select * from weather_spider;
运行生成html程序
python myVisualize.py 气温图生成成功 天气图生成成功
在当前目录会生成html,打开查看