本文是多年前学习编程时参照一个网友程序的基础之上改写的, 采用Python语音编写, 多线程下载功能, 可以有效提高Linux下原有下载工具中的一些不足,以下给出具体代码。
#!/usr/bin/python # -*- coding: utf-8 -*- # Author: Devilmaycry # Email: 812839668@qq.com # 本程序是多年前参照他人代码所写,并在原基础上做了一定改进,因时间过久已无法给出原出处,特此声明 # 测试平台 Ubuntu 14.04 X86_64 Python 2.7.6 import threading import urllib2 import sys import time # 全局最大线程数 max_thread = 20 # 初始化锁 lock = threading.RLock() class Downloader(threading.Thread): def __init__(self, url, start_size, end_size, fobj, buffer): self.url = url self.buffer = buffer self.start_size = start_size self.end_size = end_size self.fobj = fobj threading.Thread.__init__(self) #重连接次数 self.i = 1 def run(self): print 'starting: %s' % self.getName() self._download() def _download(self): while True: try: offset = self.start_size req = urllib2.Request(self.url) # 添加HTTP Header(RANGE)设置下载数据的范围 req.headers['Range'] = 'bytes=%s-%s' % (self.start_size, self.end_size) f = urllib2.urlopen(req, timeout = 15) # 初始化当前线程文件对象偏移量 while 1: block = f.read(self.buffer) # 当前线程数据获取完毕后, 则退出 if not block: print '%s done.' % self.getName() break with lock: # 设置文件对象偏移地址 self.fobj.seek(offset) # 写入获取到的数据 self.fobj.write(block) offset = offset + len(block) break except Exception: print self.getName() + " failed time " + "====="*10 + str(self.i) + " " self.i = self.i + 1 self.start_size = offset def main(url, threadNum=30, save_file='', buffer=1024): # 最大线程数量不能超过max_thread threadNum = threadNum if threadNum <= max_thread else max_thread # 获取文件的大小 req = urllib2.urlopen(url) size = int(req.info().getheaders('Content-Length')[0]) # 初始化文件对象 fobj = open(save_file, 'wb') # 根据线程数量计算 每个线程负责的http Range 大小 avg_size, pad_size = divmod(size, threadNum) plist = [] for i in xrange(threadNum): start_size = i*avg_size end_size = start_size + avg_size - 1 if i == threadNum - 1: # 最后一个线程加上pad_size end_size = end_size + pad_size + 1 pTemp = Downloader(url, start_size, end_size, fobj, buffer) plist.append(pTemp) # 开始搬砖 for t in plist: t.start() # 等待所有线程结束 for t in plist: t.join() # 结束当然记得关闭文件对象 fobj.close() print 'Download completed!' if __name__ == '__main__': start = time.time() #url = "http://dlsw.baidu.com/sw-search-sp/soft/3a/12350/QQ7.1.14509.0.1429585990.exe" url="https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz" main(url=url, threadNum=20, save_file='test', buffer=4096) end = time.time() print end-start