import requests from lxml import etree if __name__ == "__main__": # 获取整张源码数据 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } # UA伪装 url = 'https://www.aqistudy.cn/historydata/' page_text = requests.get(url=url,headers = headers).text # 进行数据解析 tree = etree.HTML(page_text) all_city_name = [] # 解析到热门城市列表 hot_li_list = tree.xpath('//div[@class = "bottom"]/ul/li') for li in hot_li_list: hot_city_name = li.xpath('./a/text()')[0] all_city_name.append(hot_city_name) # 解析到全部城市列表 city_names_list = tree.xpath('//div[@class = "bottom"]/ul/div[2]/li') for li in city_names_list: all_city = li.xpath('./a/text()')[0] all_city_name.append(all_city) print(all_city_name,len(all_city_name))