冬天来了,想着爬下某团的烤肉,代码如下:
1 """ 2 爬取某团 3 """ 4 import csv 5 import random 6 7 import requests 8 import pprint 9 import os 10 import time 11 12 keyword = input('请输入要搜索的美食(比如烤肉):') 13 14 # 以关键词创建文件 15 f = open(f'某团{keyword}烤肉.csv', mode='a', encoding='utf-8-sig', newline='') 16 csvWriter = csv.DictWriter(f, fieldnames=[ 17 '店铺ID', 18 '店铺名字', 19 '店铺商圈', 20 '店铺评分', 21 '店铺评论', 22 '烤肉类型', 23 '人均价格', 24 'x团价格', 25 # '门市价', 26 '促销信息', 27 '详情页', 28 ]) 29 csvWriter.writeheader() # 写入头 30 31 # 多页爬取 32 for page in range(0, 321, 32): # 爬十页 33 print(f'======正在爬取第前{page}个的数据======') 34 time.sleep(random.randint(2, 5)) 35 # 美团在第一页的时候就是搜索页,在第二页第三页的时候,就有json数据,也就是下面的url 36 url = 'https://apimobile.xxx.com/group/v4/poi/pcsearch/110' 37 38 # 请求参数 39 data = { 40 "uuid": "xxx", 41 "userid": "xxx", 42 "limit": 32, 43 "offset": page, 44 "cateId": -1, 45 "q": keyword, 46 "token": "自己的token", 47 } 48 # 请求头 49 headers = { 50 'Referer': 'https://qxxx.xx.com/', # 如果出现403的话,就加个来源 51 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36', 52 } 53 54 response = requests.get(url=url, headers=headers, params=data) # 带参数请求网页 55 56 # pprint.pprint(response.json()) # 至此已经获取到了json数据 57 # 提取数据 58 results = response.json()['data']['searchResult'] # 想要的数据,是个列表 59 60 for data in results: 61 # pprint.pprint(data) 62 detailUrl = 'https://www.xx.com/meishi/' + str(data['id']) 63 shopId = data['id'] # 店铺id 64 title = data['title'] # 店铺名字 65 areaname = data['areaname'] # 店铺商圈 66 reviewScore = data['avgscore'] # 店铺评分 67 reviewCount = data['comments'] # 店铺累积评论数量 68 style = data['backCateName'] # 烤肉类型 69 averagePrice = data['avgprice'] # 人均价格 70 price = data['lowestprice'] # 美团价格 71 # menshijia = data['deals'][0]['value'] # 门市价 72 # if menshijia: 73 # menshijia = menshijia 74 # else: 75 # menshijia = '未知!' 76 promotion = data['deals'][0]['title'] # 促销信息 77 detailPage = detailUrl # 店铺详情页 78 79 # 创建字典写入到csv文档 80 dit = { 81 '店铺ID': shopId, 82 '店铺名字': title, 83 '店铺商圈': areaname, 84 '店铺评分': reviewScore, 85 '店铺评论': reviewCount, 86 '烤肉类型': style, 87 '人均价格': averagePrice, 88 'x团价格': price, 89 # '门市价': menshijia, 90 '促销信息': promotion, 91 '详情页': detailPage, 92 } 93 print(dit) 94 csvWriter.writerow(dit)
# 很有可能就会被403,还是搭个代理池爬某团比较靠谱。