xpath 实战之全国城市名字爬取

import requests
from lxml import etree

if __name__ == "__main__":
    # 获取整张源码数据
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    }   # UA伪装
    url = 'https://www.aqistudy.cn/historydata/'

    page_text = requests.get(url=url,headers = headers).text

    # 进行数据解析
    tree = etree.HTML(page_text)

    all_city_name = []
    # 解析到热门城市列表
    hot_li_list = tree.xpath('//div[@class = "bottom"]/ul/li')
    for li in hot_li_list:
        hot_city_name = li.xpath('./a/text()')[0]
        all_city_name.append(hot_city_name)
    # 解析到全部城市列表
    city_names_list = tree.xpath('//div[@class = "bottom"]/ul/div[2]/li')
    for li in city_names_list:
        all_city = li.xpath('./a/text()')[0]
        all_city_name.append(all_city)

    print(all_city_name,len(all_city_name))

相关阅读:
LeetCode#191 Number of 1 Bits
敏捷编程
过程模型
磁盘阵列
RAM和ROM
cache
局部性原理
栈的应用（一）——括号的匹配
猫狗收养问题
全局变量和局部变量

原文地址：https://www.cnblogs.com/huahuawang/p/12692482.html