去薄荷网 爬取每种食物的热量值
1 from gevent import monkey 2 monkey.patch_all() 3 import gevent,requests,bs4,csv 4 from gevent.queue import Queue 5 6 def crawler(): 7 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} 8 while not work.empty(): 9 url = work.get_nowait() 10 res = requests.get(url,headers=headers) 11 bs_res = bs4.BeautifulSoup(res.text,'html.parser') 12 datas = bs_res.find('ul',class_='food-list').find_all(class_='text-box pull-left') 13 for data in datas: 14 writer.writerow([data.text.strip().replace(' ',',')]) 15 16 work = Queue() 17 18 url = 'http://www.boohee.com/food/group/{group}?page={page}' 19 for group in range(1,11): 20 for page in range(1,11): 21 real_url = url.format(group=group,page=page) 22 work.put_nowait(real_url) 23 24 csv_file = open('timetop.csv','w',newline='',encoding='utf-8-sig') 25 writer = csv.writer(csv_file) 26 27 task_list=[] 28 for x in range(4): 29 task = gevent.spawn(crawler) 30 task_list.append(task) 31 gevent.joinall(task_list) 32 33 csv_file.close()
执行结果如下
1 "红豆,又叫赤小豆、红小豆、红饭豆、米赤豆、...,热量:324 大卡(每100克)" 2 "餐包,又叫小餐包、面包,热量:284 大卡(每100克)" 3 "烧饼(加糖),热量:298 大卡(每100克)" 4 "绿豆(干),又叫青小豆、植豆、交豆,热量:329 大卡(每100克)" 5 6 7 "嘉力缘 黑豆醋参片【清畅】,热量:388 大卡(每100克)" 8 "NATURE VALLEY 蛋白质能量棒,热量:475 大卡(每100克)" 9 "康恩贝 维生素C泡腾片,热量:338 大卡(每100克)"