#线程池一次性开辟一些线程,用户给线程提交任务,线程任务的调用交给线程池来完成 # # from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor #线程池和进程池 # # def fn(name): # for i in range(1000): # print(name,i) # # if __name__ == '__main__': # #创建线程池 # with ThreadPoolExecutor(50) as t: # for i in range(100): # t.submit(fn,name=f"线程{i}") # #等待线程名完毕,才继续执行 # print("over") #思路 #1,如何提取单个页面的数据 #2.上线程池,多个页面同时抓取 import csv import requests from concurrent.futures import ThreadPoolExecutor f=open("菜价.csv",mode="w",encoding="utf-8",newline='') csvwriter=csv.writer(f) def down_one_page(url): resp=requests.get(url) for i in range(20): list=[] prodName=resp.json()["list"][i]['prodName'] avgPrice=resp.json()["list"][i]['avgPrice'] place=resp.json()["list"][i]['place'] list.append(prodName) list.append(avgPrice) list.append(place) print(list) csvwriter.writerow(list) if __name__ == '__main__': #线程池 down_one_page("http://www.xinfadi.com.cn/getPriceData.html?current=1") with ThreadPoolExecutor(50) as t: for i in range(200): #爬取200页数据 t.submit(down_one_page,f"http://www.xinfadi.com.cn/getPriceData.html?current={i}") print("下载完毕")
多线程爬取新发地300页菜价,速度得到很大的提升