本代码实现多进程下载指定下载列表的功能。
注意事项有:
1、下载过程中,显示总数、已存在、已下载、出错、剩余等信息,以便随时掌握进度。
2、可以指定重试次数(在程序中指定)
3、进程数、下载列表由命令行参数指定
4、保存位置需要在程序中指定
# -*- coding: utf-8 -*- """ Created on Sat Nov 16 07:52:40 2019 @author: mi """ import requests import os import csv exist_count=0#已存在 downloaded_count=0#已下载 total_count=0#总数 error_count=0#出错 def downloading_over(arg): global downloaded_count global total_count global exist_count global error_count print("返回状态:",arg) if arg=='EXISTS': exist_count+=1 if arg=='SUCCESS': downloaded_count+=1 if arg=='ERROR': error_count+=1 print('总数:%s / 已存在:%s / 已下载:%s / 出错:%s / 剩余:%s' % (str(total_count),str(exist_count),str(downloaded_count),str(error_count),str(total_count-exist_count-downloaded_count-error_count))) def get_page(link): url=link[0] savePath=link[1] print(savePath) if os.path.exists(savePath): print('已存在') return 'EXISTS' times=3 while (times>0): times=times-1 try: resp=requests.get(url,timeout=30) except requests.RequestException as e: print(e) continue if not os.path.exists(os.path.dirname(savePath)): os.makedirs(os.path.dirname(savePath)) with open(savePath,'wb')as fw: fw.write(resp.content) return 'SUCCESS' else: return 'ERROR' from multiprocessing import Pool import sys if __name__ == "__main__": __spec__ = "ModuleSpec(name='builtins', loader=<class '_frozen_importlib.BuiltinImporter'>)" process_num=sys.argv[1] print('进程数量:'+process_num) download_list=sys.argv[2] print('下载列表:'+download_list) pool = Pool(processes=int(process_num)) # set the processes max number with open(download_list,'r',encoding='utf-8') as downlist: lines=csv.reader(downlist) #下载列表,第一列为下载链接,第二列为保存位置 for line in lines: total_count+=1 link=[] url='http://www.xxx.com/'+line[0] link.append(url) savePath='D:/saveFolder/'+line[1]+'.htm' link.append(savePath) pool.apply_async(func=get_page, args=(link,),callback=downloading_over) pool.close() pool.join()