• xpath 实战之全国城市名字爬取


    import requests
    from lxml import etree
    
    if __name__ == "__main__":
        # 获取整张源码数据
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        }   # UA伪装
        url = 'https://www.aqistudy.cn/historydata/'
    
        page_text = requests.get(url=url,headers = headers).text
    
        # 进行数据解析
        tree = etree.HTML(page_text)
    
        all_city_name = []
        # 解析到热门城市列表
        hot_li_list = tree.xpath('//div[@class = "bottom"]/ul/li')
        for li in hot_li_list:
            hot_city_name = li.xpath('./a/text()')[0]
            all_city_name.append(hot_city_name)
        # 解析到全部城市列表
        city_names_list = tree.xpath('//div[@class = "bottom"]/ul/div[2]/li')
        for li in city_names_list:
            all_city = li.xpath('./a/text()')[0]
            all_city_name.append(all_city)
    
        print(all_city_name,len(all_city_name))
  • 相关阅读:
    LeetCode#191 Number of 1 Bits
    敏捷编程
    过程模型
    磁盘阵列
    RAM和ROM
    cache
    局部性原理
    栈的应用(一)——括号的匹配
    猫狗收养问题
    全局变量和局部变量
  • 原文地址:https://www.cnblogs.com/huahuawang/p/12692482.html
Copyright © 2020-2023  润新知