• python3抓取中国天气网不同城市7天、15天实时数据


    思路:
    1、根据city.txt文档来获取不同城市code
    2、获取中国天气网7d和15d不同城市url
    3、利用requests库请求url获取html内容
    4、利用beautifulsoup获取7d和15d指定天气数据
    5、将获取的天气数据保存到csv文件中



    # -*- coding: utf-8 -*-
    import requests
    from bs4 import BeautifulSoup
    import csv
    
    
    '''
    获取不同城市code
    '''
    
    
    def get_citycode(city_name):
        with open('city.txt', 'r', encoding='UTF-8') as fs:
            lines = fs.readlines()#一次读取整个文件内容,且自动分成一行列表,readline()每次只读取一行
            for line in lines:
                if(city_name in line):
                    code = line.split('=')[0].strip()#每行去掉头尾空格,且用“=”分隔出code和cityname,返回字符串列表
                    return code
        raise ValueError('invalid city name')#抛出异常
    
    
    
    
    
    '''
    获取不同城市7天url
    '''
    
    
    def get_7d_url(city_name):
        url = 'http://www.weather.com.cn/weather/'
        code = get_citycode(city_name)
        return url + code + '.shtml'
    
    
    '''
    获取不同城市15天url
    '''
    
    
    def get_15d_url(city_name):
        url = 'http://www.weather.com.cn/weather15d/'
        code = get_citycode(city_name)
        return url + code + '.shtml'
    
    
    ''''
    获取html内容
    '''
    
    
    def get_content(url, data=None):
        rep = requests.get(url, timeout=60)
        rep.encoding = 'utf-8'
        return rep.text
    
    
    '''
    获取7天指定数据
    '''
    
    
    def get_7d_data(htmltext, city):
        content = []
        bs = BeautifulSoup(htmltext, "html.parser")
        body = bs.body
        data = body.find('div', {'id': '7d'})
        ul = data.find('ul')
        li = ul.find_all('li')
        for day in li:
            line = [city]
            date = day.find('h1').string
            p = day.find_all('p')
            text = p[0].string
            if p[1].find('span') is None:
                temperature_H = None
            else:
                temperature_H = p[1].find('span').string
            temperature_L = p[1].find('i').string
            wind_force = p[2].find('i').string
            line.append(date)
            line.append(text)
            line.append(temperature_H)
            line.append(temperature_L)
            line.append(wind_force)
            content.append(line)
        return content
    
    
    
    '''
    获取15天指定数据
    '''
    
    
    def get_15d_data(htmltext, city):
        content = []
        bs = BeautifulSoup(htmltext, "html.parser")
        body = bs.body
        data = body.find('div', {'id': '15d'})
        ul = data.find('ul')
        li = ul.find_all('li')
        for day in li:
            line = [city]
            span = day.find_all('span')
            date = span[0].string
            text = span[1].string
            if span[2].find('em') is None:
                temperature_H = None
            else:
                temperature_H = span[2].find('em').string
            temperature_L = span[2].string
            wind_direction = span[3].string
            wind_force = span[4].string
            line.append(date)
            line.append(text)
            line.append(temperature_H)
            line.append(temperature_L)
            line.append(wind_direction)
            line.append(wind_force)
            content.append(line)
        return content
    
    
    
    
    '''
    保存获取到的天气数据
    csv文件
    '''
    
    
    def save_data(data, filename):
        with open(filename, 'a', errors='ignore', newline='') as f: #newline=" "是为了避免写入之后有空行
            f_csv = csv.writer(f)
            f_csv.writerows(data)#数据整行写入csv文件中
    
    
    
    
    
    
    
    '''
    爬取7天天气数据
    '''
    def _7d(city):
        url = get_7d_url(city)
        html = get_content(url)
        result = get_7d_data(html,city)
        save_data(result, 'E:weather.csv')
    
    
    
    
    '''
    爬取15天天气数据
    '''
    def _15d(city):
        url = get_15d_url(city)
        html = get_content(url)
        result = get_15d_data(html,city)
        save_data(result, 'E:weather.csv')
    
    
    
    
    if __name__ == '__main__':
        cities = input('city name: ').split(' ')  # 键盘输入城市,用空格分隔开
        for city in cities:
            _7d(city)
            _15d(city)
    

      

    附:city.txt 获取地址:https://pan.baidu.com/s/1VNW8AJi6_zo7mP_90lTkiA      提取码:red5 

     

  • 相关阅读:
    解决jar包冲突
    postman使用记录
    get请求直接通过浏览器发请求传数组或者list到后台
    excel中ppmt/pmt/ipmt的计算方式
    unicode编码与解码
    spring参数拼装
    java内存模型(jmm)
    Mysql事务,并发问题,锁机制-- 幻读、不可重复读(转)
    星空雅梦
    星空雅梦
  • 原文地址:https://www.cnblogs.com/ZoeLiang/p/11357732.html
Copyright © 2020-2023  润新知