• 天气预报爬虫例子源码


    这是一个通过python中TkInter,Requests,Re模块实现的天气预报爬虫程序,软件界面为图形界面,软件运行界面如下:

     

    操作方法只需要在查询城市编辑框输入完整的城市名称或完整拼音字母,比如深圳可以输入shenzhen,然后点查询就可查询,目前程序实现了国内和国际的天气预报查询,国内8日内天气预报查询,国外实现7日内天气预报查询功能,

    同时支持软件开启自动显示当前城市天气预报查询功能:

    源代码如下:

      1 # *_* coding:utf-8 *_*
      2 
      3 # 开发团队:中国软件开发团队
      4 # 开发人员:Administrator
      5 # 开发时间:2019/3/23 5:16
      6 # 文件名称:weatherSpider
      7 # 开发工具:PyCharm
      8 
      9 
     10 import tkinter
     11 import tkinter.messagebox
     12 from tkinter import ttk
     13 import requests
     14 # from PIL import ImageTk as itk
     15 from selenium import webdriver
     16 # from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
     17 from selenium.webdriver.chrome.options import Options
     18 import re
     19 
     20 
     21 '''
     22 获取本地所在城市名称
     23 '''
     24 
     25 
     26 def get_local_city():
     27     chrome_options = Options()
     28     chrome_options.add_argument('--headless')
     29     chrome_options.add_argument('--disable-gpu')
     30     #
     31     # 更换头部
     32     chrome_options.add_argument(
     33         'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"')
     34 
     35     driver = webdriver.Chrome(executable_path='./chromedriver.exe', chrome_options=chrome_options)
     36     driver.get("http://www.weather.com.cn")
     37     text = driver.page_source
     38 
     39     result = re.findall('<span class="city_name"><em>(.*?)</em></span>', text, re.S)
     40     driver.close()
     41     return result[0]
     42 
     43 
     44 class MyFrame(tkinter.Frame):
     45     def __init__(self, default_city):
     46         self.root = tkinter.Tk()
     47 
     48         self.root.title("天气查询")
     49         self.root.geometry('1200x700+400+220')
     50         # 修改默认应用程序图标
     51         self.root.iconbitmap('camero.ico')
     52 
     53         bg = tkinter.Canvas(self.root, width=1200, height=600, bg='white')
     54         # self.img = itk.PhotoImage(file="bg.jpg")
     55         bg.place(x=100, y=40)
     56         # bg.create_image(0, 0, anchor=tkinter.NW, image=self.img)
     57 
     58         self.city = tkinter.Entry(self.root, width=16, font=("仿宋", 18, "normal"))
     59         self.city.place(x=200, y=60)
     60         self.city.insert(0, default_city)
     61 
     62         citylabel = tkinter.Label(self.root, text='查询城市', font=("仿宋", 18, "normal"))
     63         citylabel.place(x=80, y=60)
     64 
     65         # 查询按钮
     66         chaxun = tkinter.Button(self.root, width=10, height=3, text="查询", bg='#00CCFF', bd=5, font="bold",command=self.search)
     67        
     68         chaxun.place(x=800, y=50)
     69 
     70         # 清除按钮
     71         clearbtn = tkinter.Button(self.root, width=10, height=3, text="清除", bg='#00CCFF', bd=5, font="bold",command=self.clear)
     72       
     73         clearbtn.place(x=950, y=50)
     74 
     75         poslabel = tkinter.Label(self.root, text='选择位置', font=("仿宋", 18, "normal"))
     76         poslabel.place(x=80, y=100)
     77 
     78         comvalue = tkinter.StringVar()  # 窗体自带的文本,新建一个值
     79         self.comboxlist = ttk.Combobox(self.root, width=30, height=18, font=("仿宋", 18, "normal"),
     80                                        textvariable=comvalue)  # 初始化
     81         self.comboxlist["values"] = ("1", "2", "3")
     82         self.comboxlist.current(0)  # 选择第一个
     83         self.comboxlist.bind("<<ComboboxSelected>>", self.choose)  # 绑定事件,(下拉列表框被选中时,绑定choose()函数)
     84         self.comboxlist.place(x=200, y=100)
     85 
     86         self.result = tkinter.Listbox(self.root, heigh=18, width=65, font=("仿宋", 20, "normal"))  # 显示天气框
     87         self.result.place(x=125, y=150)
     88 
     89         self.citys = []
     90 
     91         self.headers = {
     92             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
     93             'Cookie': '__guid=182823328.3322839646442213000.1543932524694.901; vjuids=1858d43b6.167798cbdb7.0.8c4d7463d5c5d; vjlast=1543932526.1543932526.30; userNewsPort0=1; f_city=%E5%B9%B3%E9%A1%B6%E5%B1%B1%7C101180501%7C; Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b=1543932526,1543932551,1543932579; Wa_lvt_1=1547464114,1547464115,1547880054,1547983123; defaultCty=101181001; defaultCtyName=%u5546%u4E18; monitor_count=6; Wa_lpvt_1=1547983809'
     94         }
     95 
     96         # 开启本地天气查询
     97         if (default_city != ''):
     98             self.tianqiforecast(default_city)
     99 
    100     def tianqiforecast(self, searchcity):
    101 
    102         city = searchcity
    103         url = 'http://toy1.weather.com.cn/search?cityname=' + city + '&callback=success_jsonpCallback&_=1548048506469'
    104         response = requests.get(url, headers=self.headers)
    105         html1 = response.content.decode('utf-8')
    106         self.citys = re.findall('"ref":"(.*?)~.*?~(.*?)~.*?~(.*?)~.*?~.*?~.*?~.*?~(.*?)"', html1, re.S)
    107         if (len(self.citys) == 0):
    108             a = "出错了,未查找到该城市"
    109             self.result.insert(tkinter.END, a)
    110             return
    111         # 显示当前城市常用查询点
    112         plist = []
    113         for i in range(0, len(self.citys)):
    114             # print(i + 1, ':%14s ' % "".join(citys[i]))
    115             plist.append(self.citys[i][1])
    116         pos = tuple(plist)
    117         self.comboxlist["values"] = pos
    118         self.comboxlist.current(0)
    119         if len(self.citys) != 0:
    120             self.query(0)
    121 
    122     def search(self):
    123         mycity = self.city.get()
    124         if (mycity != ''):
    125             self.clear()
    126             self.tianqiforecast(mycity)
    127 
    128     def query(self, choose):
    129         if (len(self.citys[choose][0]) == 9):
    130             if (self.citys[choose][0][0] != '1' or self.citys[choose][0][1] != '0' or self.citys[choose][0][2] != '1'):
    131                 # 查询国外天气
    132 
    133                 url2 = 'http://www.weather.com.cn/weathern/' + self.citys[choose][0] + '.shtml'
    134                 responseweather = requests.get(url2, headers=self.headers)
    135                 html2 = responseweather.content.decode('utf-8')
    136 
    137                 weather = re.findall('<li class="date-.*?".*?".*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
    138                 temp_weather = re.findall(
    139                     '<p class="weather-info">(.*?)</p>.*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">(.*?)</p>',
    140                     html2, re.S)
    141                 if len(temp_weather) < 7:
    142                     # 当天
    143                     today1 = re.findall(
    144                         '<li class="blue-item active".*?>(.*?)<div class="item-active"></div>\n</li>',
    145                         html2, re.S)
    146                     today = re.findall('<p class="weather-info">(.*?)</p>.*?<p class="wind-info">(.*?)</p>', today1[0],
    147                                        re.S)
    148                     print(today)
    149                     # 后6天
    150                     weather.append(temp_weather)
    151                 else:
    152                     weather.append(temp_weather)
    153 
    154                 Hightempture = re.findall(
    155                     '<script>var eventDay =["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"];', html2,
    156                     re.S)
    157                 Lowtempture = re.findall(
    158                     'var eventNight =["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"];',
    159                     html2, re.S)
    160                 # print(Hightempture,Lowtempture)
    161                 b = '查询城市为:' + str(self.citys[choose][3]) + '    ' + str(self.citys[choose][1])
    162                 self.result.insert(tkinter.END, b)
    163                 if len(temp_weather) < 7:  # 如日本
    164                     if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or 
    165                             len(Lowtempture[0]) != 7 or len(Hightempture[0]) != 7:
    166                         a = '系统出错,数据不完整:'
    167                         self.result.insert(tkinter.END, a)
    168                         self.result.insert(tkinter.END, url2)
    169                         print(url2)
    170                     else:
    171                         for i in range(0, 7):
    172                             if i < 1:
    173                                 a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
    174                                     i] + '' + str(today[0][0]) + ' 风:' + str(today[0][1])
    175                                 self.result.insert(tkinter.END, a)
    176 
    177                             else:
    178                                 a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
    179                                     i] + '' + "".join(weather[7][i - 1])
    180 
    181                                 self.result.insert(tkinter.END, a)
    182                 else:  # 如美国
    183                     if len(temp_weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or 
    184                             len(Lowtempture[0]) != 7 or len(Hightempture[0]) != 7:
    185                         a = '系统出错,数据不完整:'
    186                         self.result.insert(tkinter.END, a)
    187                         self.result.insert(tkinter.END, url2)
    188                         print(url2)
    189                     else:
    190                         for i in range(0, 7):
    191                             a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
    192                                 i] + '' + "".join(weather[7][i])
    193 
    194                             self.result.insert(tkinter.END, a)
    195 
    196 
    197             else:#国内天气查询
    198                 url2 = 'http://www.weather.com.cn/weathern/' + self.citys[choose][0] + '.shtml'
    199                 responseweather = requests.get(url2, headers=self.headers)
    200                 html2 = responseweather.content.decode('utf-8')
    201 
    202                 weather = re.findall('<li class="date-.*?".*?".*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
    203                 weather.append(re.findall(
    204                     '<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">(.*?)</p>',
    205                     html2, re.S))
    206                 Hightempture = re.findall(
    207                     '<script>var eventDay =["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"];', html2,
    208                     re.S)
    209                 Lowtempture = re.findall(
    210                     'var eventNight =["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"];',
    211                     html2, re.S)
    212 
    213                 b = '查询城市为:' + str(self.citys[choose][3]) + '    ' + str(self.citys[choose][1])
    214                 self.result.insert(tkinter.END, b)
    215                 if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or 
    216                         len(Lowtempture[0]) != 8 or len(Hightempture[0]) != 8:
    217                     a = '系统出错,数据不完整:'
    218                     self.result.insert(tkinter.END, a)
    219                     self.result.insert(tkinter.END, url2)
    220                     print(url2)
    221                 else:
    222                     for i in range(0, 8):
    223                         a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
    224                             i] + '' + "".join(weather[8][i])
    225 
    226                         self.result.insert(tkinter.END, a)
    227 
    228         if (len(self.citys[choose][0]) == 12):  # 查询搜索相关结果的下一个城市天气预报
    229             url2 = 'http://forecast.weather.com.cn/town/weathern/' + self.citys[choose][0] + '.shtml'
    230             responseweather = requests.get(url2, headers=self.headers)
    231             html2 = responseweather.content.decode('utf-8')
    232 
    233             weather = re.findall('<li class="date-.*?".*?"da.*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
    234 
    235             html2 = re.sub('lt;', '<', html2)
    236             weather.append(re.findall(
    237                 '<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">\r\n(.*?)\r\n',
    238                 html2, re.S))
    239 
    240             Hightempture = re.findall(
    241                 'var eventDay = ["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"];', html2, re.S)
    242 
    243             Lowtempture = re.findall(
    244                 'var eventNight = ["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"];',
    245                 html2, re.S)
    246             # print(Hightempture,Lowtempture)
    247             b = '查询城市为:' + str(self.citys[choose][3]) + '   ' + str(self.citys[choose][2]) + '    ' + str(
    248                 self.citys[choose][1])
    249             self.result.insert(tkinter.END, b)
    250 
    251             if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or 
    252                     len(Lowtempture[0]) != 8 or len(Hightempture[0]) != 8:
    253                 a = '系统出错,数据不完整:'
    254                 self.result.insert(tkinter.END, a)
    255                 self.result.insert(tkinter.END, url2)
    256                 print(url2)
    257             else:
    258                 for i in range(0, 8):
    259                     a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
    260                         i] + '' + "".join(weather[8][i])
    261                     # print(a)
    262                     self.result.insert(tkinter.END, a)
    263 
    264     '''
    265     选择搜索城市相关的下一个城市名称,并进行天气查询
    266     '''
    267 
    268     def choose(self, event):
    269         c = self.comboxlist.get()
    270         choose = -1
    271         for i in range(0, len(self.citys)):
    272             if c == self.citys[i][1]:
    273                 choose = i;
    274                 break;
    275         if choose != -1:
    276             self.query(choose)
    280 
    281     '''
    282     清除天气查询结果
    283     '''
    284 
    285     def clear(self):
    286         self.result.delete(0, tkinter.END)
    287         # self.city.delete(0, tkinter.END)
    288         # tkinter.messagebox.showerror('showerror', 'hello')
    289 
    290 
    291 
    292 if __name__ == '__main__':
    293     # 获取当前城市
    294     default_city = get_local_city();
    295 
    296     myframe = MyFrame(default_city)
    297     myframe.root.mainloop()

    通过该程序主要学习爬虫的解决问题思想和熟悉规则表达式Re模块,网络数据抓取requests模块,selenium模块及TkInter GUI模块的具体使用方法。

    对于初学python网友可以参考一下几本优秀的图书

    零基础学python  配套光盘下载地址

    Python从入门到项目实践

    其它有关爬虫开发参考资料可参考如下书籍:

    查看目录

    Python数据抓取技术与实战.pdf
    Python爬虫入门到实战.pdf
    Python爬虫实战入门教程.pdf
    Python网络数据采集.pdf
    PYTHON网络爬虫从入门到实践.pdf
    《Python爬虫开发与项目实战》.pdf
    精通Scrapy网络爬虫.pdf
    网络爬虫-Python和数据分析.pdf
    网络爬虫全解析 技术、原理与实践.pdf
    Python 3爬虫、数据清洗与可视化实战

    ...


    常用软件开发学习资料目录:  

    1.经典编程电子书收藏  

    2.C&C++编程学习资料收藏   

    3.算法及数据结构(有关c,c++,java)   

    4.Java开发学习资料收藏      

    5.Android开发学习资料收藏  

    6.Python开发学习资料收藏  

    7.大数据,机器学习,人工智能资料收藏

    8.Docker资料收藏

  • 相关阅读:
    枚举类型的应用
    动手动脑
    四则运算和验证码--源码
    ATM源码
    javabean+jsp+servlet+jdbc
    四则运算改良
    Java异常
    课后总结
    包装类Integre
    对象验证
  • 原文地址:https://www.cnblogs.com/it-tsz/p/10586709.html
Copyright © 2020-2023  润新知