• Python 批量文件下载


    python文件 批量下载 、图片批量下载 ,批量请求、爬虫

    #====================================================================================

    downloadFile.py

    #====================================================================================

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
     
    from gevent import monkey
    monkey.patch_all()
    from gevent.pool import Pool
    import requests
    import sys
    import os

    def download(url):
        chrome = 'Mozilla/5.0 (X11; Linux i86_64) AppleWebKit/537.36 ' +'(KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36'
        headers = {'User-Agent': chrome}
        # filename = url.split('/')[-1].strip()
        filename = "respose.log"
        r = requests.get(url.strip(), headers=headers, stream=True)
        with open(filename, 'a+') as f:
            for chunk in r.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
            f.flush()
            print filename,"is ok"

    def removeLine(key, filename):
        os.system('sed -i /%s/d %s' % (key, filename))

    if __name__ =="__main__":
        if len(sys.argv) == 2:
            filename = sys.argv[1]
            f = open(filename,"r")
            p = Pool(4)
            for line in f.readlines():
                if line:
                    p.spawn(download, line.strip())
                    key = line.split('/')[-1].strip()
                    #removeLine(key, filename)
                    f.close()
                    p.join()
        else:
            print 'Usage: python %s urls.txt' % sys.argv[0]

    #====================================================================================

    #====================================================================================

    测试文件 url.txt

    #===========================

    http://download2.boulder.ibm.com/sar/CMA/RAA/075lj/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075ln/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075lt/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075m7/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075m9/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075mb/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075mf/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075mn/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075ms/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075mv/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075nd/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075nk/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075no/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075nr/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075ns/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075nu/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075ny/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075o0/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075o1/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075p8/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075px/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075py/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075pz/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075q1/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075q3/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075q5/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/075zm/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/0758i/0/
    http://download2.boulder.ibm.com/sar/CMA/RAA/0759s/0/

    #=================================================

    运行结果

    G: estappscan>python downloadFile.py url.txt

    respose.log is ok
    respose.log is ok
    respose.log is ok
    respose.log is ok
    respose.log is ok
    respose.log is ok
    respose.log is ok
    respose.log is ok
    respose.log is ok

    #================================

    结果文件

  • 相关阅读:
    React全家桶+AntD 共享单车后台管理系统开发
    eclipse中通过Properties Editor插件查看配置文件中Unicode内容
    修改eclipse的编码格式
    后端接收前端数据中文乱码解决方案
    MySQL基础
    wordpress个人常用标签调用
    4gl游标cursor
    尝试写一写4gl与4fd
    foreach循環體控制
    保护wordpress后台登录地址
  • 原文地址:https://www.cnblogs.com/hua198/p/10044726.html
Copyright © 2020-2023  润新知