本文仅用于学习和交流使用,不具有任何商业价值,如有问题,请与我联系,我将即时处理。
数据来源于百度地图。开发者工具就不说了,直接上代码:
1 """ 2 爬取百度地图全国火锅店并可视化 3 """ 4 import pprint 5 6 import openpyxl as op 7 import requests 8 9 # 热门城市 10 wb_hotCity = op.Workbook() # 工作簿 11 ws_hotCity = wb_hotCity.create_sheet(index=0) 12 ws_hotCity.cell(row=1, column=1, value='城市') 13 ws_hotCity.cell(row=1, column=2, value='数量') 14 15 # 省份 16 wb_province = op.Workbook() # 工作簿 17 ws_province = wb_province.create_sheet(index=0) #工作表 18 ws_province.cell(row=1, column=1, value='省份') 19 ws_province.cell(row=1, column=2, value='数量') 20 21 # 省份下的市数量 22 wb_province_cities = op.Workbook() 23 ws_province_cities = wb_province_cities.create_sheet(index=0) 24 ws_province_cities.cell(row=1, column=1, value='城市') 25 ws_province_cities.cell(row=1, column=2, value='数量') 26 27 def get_json(): 28 url = 'https://xx.baixx.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=s&c=1&wd=火锅店&da_src=shareurl&on_gel=1&l=4&gr=1&b=(6818483.628395475,-1378890.6182441516;19500319.04895588,5539245.839424342)&pn=0&device_ratio=2&auth=G0dY4cVy9WzEHUcbN2egDb@CE3v8H2ESuxLBNRzEzNBtDcnVDpneC@BvYgP1PcGCgYvjPuVtvYgPMGvgWv@uVtvYgPPxRYuVtvYgP@vYZcvWPCuVtvYgP@ZPcPPuVtvYgPhPPyheuVtvhgMuxVVty1uVtCGYuVt1GgvPUDZYOYIZuVt1cv3uVtGccZcuVtPWv3Guxt58Jv7ucvY1SGpuxLt@jUfJxvYlcvIKMNQTXZbegHcEWe1GD8zv7u@ZPuVtc3CuVteuVtegvcguxLBNRzEzNBtquTTGdFrZZWuV&seckey=Ml81XZ+wbx7jyS5hXlEdUOZNM5KJW3eWH1zi1SkgdUw=,2ccphtdcku0z52vCIEYIggYcdu8QHAJF1lQ+5wl2NMlaWIlTiCrONly/LJX4jdosle2Jf9LuHi8DHeLXtZRfnXu5J/0ctPoquwFQq6tZMA4TzSUjV2KQkSKzylzZf30ZOmZRhyug2Upr+mVaZqFcTu6hRc8/N3XYrjIVwQLMken5x8Iaqv13CW13nUMjW9LO&tn=B_NORMAL_MAP&nn=0&u_loc=13182436,2819866&ie=utf-8&t=1637824275666&newfrom=zhuzhan_webmap' 29 headers = { 30 'Cookie':cooikes, 31 'Referer':'https://map.baidu.com/search/xxxx/@13000533.070537817,2742247.240299427,4.95z?querytype=s&c=1&wd=%E7%81%AB%E9%94%85%E5%BA%97&da_src=shareurl&on_gel=1&l=4&gr=1&b=(6818483.628395475,-1378890.6182441516;19500319.04895588,5539245.839424342)&pn=0&device_ratio=2', 32 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36', 33 } 34 response = requests.get(url=url, headers=headers) 35 return response.json() 36 37 # 热门城市 38 def get_hotCities(json_data): 39 # 获取热门城市数量 40 hotCities = json_data['content'] 41 for item in hotCities: 42 cityName = item['name'] 43 num = item['num'] 44 ws_hotCity.append([cityName, num]) 45 46 def get_province(json_data): 47 # 获取其他省市的总数量 48 moreProvince = json_data['more_city'] 49 for item in moreProvince: 50 province = item['province'] # 省份 51 num = item['num'] # 数量 52 ws_province.append([province, num]) # 各省总数 53 54 def get_cities(json_data): 55 # 获取省下面市的数量 56 provinceCities = json_data['more_city'] 57 print(type(provinceCities)) # list 58 for item in provinceCities: 59 # print(item['city']) 60 for needitem in item['city']: 61 city = needitem['name'] 62 num = needitem['num'] 63 print(city, num) 64 ws_province_cities.append([city, num]) # 各省下城市总数量 65 json_data = get_json() 66 # pprint.pprint(json_data) 67 if __name__ == "__main__": 68 get_hotCities(json_data) 69 get_province(json_data) 70 get_cities(json_data) 71 wb_hotCity.save('热门城市火锅店数量.xlsx') 72 wb_province.save('各省总火锅店数量.xlsx') 73 wb_province_cities.save('各省城市火锅店数量.xlsx') 74 wb_hotCity.close() 75 wb_province.close() 76 wb_province_cities.close()
保存结果,三张图:
虽然数据来源于百度地图,为了学习,用python也做个地图:
1 """ 2 首先是全国火锅店分布数量 3 """ 4 import pandas as pd 5 from pyecharts.charts import Map 6 from pyecharts.globals import ThemeType 7 import pyecharts.options as opts 8 9 data = pd.read_excel('各省总火锅店数量.xlsx') #用熊猫读取数据 10 # 提取出省份和数量 11 provinceList = data['省份'].tolist() # 将省份名称作为列表 12 num = data['数量'].tolist() # 将数量也做成列表 13 print(provinceList) 14 15 provinces = [] 16 # 修正省份参数,因为pyecharts只支持前面有标识的字,内蒙古自治区要处理成内蒙古,广西壮族自治区要处理成广西 17 for i in provinceList: 18 if '省' and '市' in i: 19 provinces.append(i[:-1]) 20 elif '内蒙古' in i: # 处理内蒙古自治区为内蒙古 21 neimenggu = i[0:3] 22 provinces.append(neimenggu) 23 elif '自治区' in i: # 处理类似广西壮族自治区和宁夏回族自治区之类的字符广西和宁夏 24 provinces.append(i[0:2]) 25 else: 26 provinces.append(i[:-1]) # 处理黑龙江省成黑龙江 27 # print(len(provinceList)) # 比较长度是否相等 28 # print(len(provinces)) # 比较长度是否相等 29 30 # 开始绘图 31 map = ( 32 Map(init_opts=opts.InitOpts(width='1280px',height='960px', theme=ThemeType.VINTAGE)) 33 .add('数量', [list(z) for z in zip(provinces, num)], 'china') # 中国地图 34 .set_global_opts( 35 title_opts=opts.TitleOpts(title='全国火锅店分布数量',), 36 legend_opts=opts.LegendOpts(pos_left='10%', pos_top='10%') 37 ) 38 ).render('全国火锅店分布数量.html')
运行截图:
数据发现,全国火锅店数量最多的不是四川,而是山东,做下山东地图可视化:
1 """ 2 做一下全国火锅店数量最多的省的可视化 3 """ 4 import pandas as pd 5 from pyecharts.charts import Map 6 import pyecharts.options as opts 7 from pyecharts.globals import ThemeType 8 9 # 读取数据 10 data = pd.read_excel('各省城市火锅店数量.xlsx') 11 cities = data['城市'].tolist() 12 counts = data['数量'].tolist() 13 # 数据发现山东最多,做下山东的图 14 city = [] 15 count = [] 16 flag = 0 17 for i in range(0, len(cities)): 18 if cities[i] == '青岛市': 19 flag = 1 20 if flag: 21 city.append(cities[i]) 22 count.append(int(counts[i])) 23 if cities[i] == '日照市': 24 city.append(cities[i]) # 提取日照 25 count.append(int(counts[i])) # 将日照的数量提取 26 break 27 28 print(city, count) #打印符合预期 29 30 c = ( 31 Map(init_opts=opts.InitOpts(width='1280px', height='960px', theme=ThemeType.VINTAGE)) 32 .add("山东火锅分布", [list(z) for z in zip(city, count)], "山东") 33 .set_global_opts( 34 title_opts=opts.TitleOpts(title="山东火锅店数量分布"), 35 visualmap_opts=opts.VisualMapOpts(), 36 legend_opts=opts.LegendOpts(pos_left='5%', pos_top='10%'), 37 ) 38 ).render("山东火锅店数量分布.html")
截图:
收工。