概念:
什么是线程池?
诸如web服务器、数据库服务器、文件服务器和邮件服务器等许多服务器应用都面向处理来自某些远程来源的大量短小的任务。构建服务器应用程序的一个过于简单的模型是:每当一个请求到达就创建一个新的服务对象,然后在新的服务对象中为请求服务。但当有大量请求并发访问时,服务器不断的创建和销毁对象的开销很大。所以提高服务器效率的一个手段就是尽可能减少创建和销毁对象的次数,特别是一些很耗资源的对象创建和销毁,这样就引入了“池”的概念,“池”的概念使得人们可以定制一定量的资源,然后对这些资源进行复用,而不是频繁的创建和销毁。
线程池是预先创建线程的一种技术。线程池在还没有任务到来之前,创建一定数量的线程,放入空闲队列中。这些线程都是处于睡眠状态,即均为启动,不消耗CPU,而只是占用较小的内存空间。当请求到来之后,缓冲池给这次请求分配一个空闲线程,把请求传入此线程中运行,进行处理。当预先创建的线程都处于运行状态,即预制线程不够,线程池可以自由创建一定数量的新线程,用于处理更多的请求。当系统比较闲的时候,也可以通过移除一部分一直处于停用状态的线程。
1,简易方法,利用队列实现
(1)
from threading import Thread import time, os, queue class Threading_Pool(object): def __init__(self, MAX_NUM = 20): self.queue = queue.Queue(MAX_NUM) # self.queue.join() for i in range(MAX_NUM): self.queue.put(Thread) def get_thread(self): return self.queue.get() def put_thread(self): self.queue.put(Thread) def join(self): self.queue.join() def test(tp): time.sleep(1) print('haha') tp.put_thread() # print(tp.queue.full())获取队列是否满了 if __name__ == '__main__': tp = Threading_Pool(5) # tp.join() for i in range(20): p = tp.get_thread() t1 = p(target = test, args = (tp, )) t1.start()
打印结果会1秒出5个
(2)
1 # -*- coding:utf-8 -*- 2 3 import Queue 4 import threading 5 import time 6 7 class WorkManager(object): 8 def __init__(self, work_num=1000,thread_num=2): 9 self.work_queue = Queue.Queue() 10 self.threads = [] 11 self.__init_work_queue(work_num) 12 self.__init_thread_pool(thread_num) 13 14 """ 15 初始化线程 16 """ 17 def __init_thread_pool(self,thread_num): 18 for i in range(thread_num): 19 self.threads.append(Work(self.work_queue)) 20 21 """ 22 初始化工作队列 23 """ 24 def __init_work_queue(self, jobs_num): 25 for i in range(jobs_num): 26 self.add_job(do_job, i) 27 28 """ 29 添加一项工作入队 30 """ 31 def add_job(self, func, *args): 32 self.work_queue.put((func, list(args)))#任务入队,Queue内部实现了同步机制 33 34 """ 35 等待所有线程运行完毕 36 """ 37 def wait_allcomplete(self): 38 for item in self.threads: 39 if item.isAlive():item.join() 40 41 class Work(threading.Thread): 42 def __init__(self, work_queue): 43 threading.Thread.__init__(self) 44 self.work_queue = work_queue 45 self.start() 46 47 def run(self): 48 #死循环,从而让创建的线程在一定条件下关闭退出 49 while True: 50 try: 51 do, args = self.work_queue.get(block=False)#任务异步出队,Queue内部实现了同步机制 52 do(args) 53 self.work_queue.task_done()#通知系统任务完成 54 except: 55 break 56 57 #具体要做的任务 58 def do_job(args): 59 time.sleep(0.1)#模拟处理时间 60 print threading.current_thread(), list(args) 61 62 if __name__ == '__main__': 63 start = time.time() 64 work_manager = WorkManager(10000, 10)#或者work_manager = WorkManager(10000, 20) 65 work_manager.wait_allcomplete() 66 end = time.time() 67 print "cost all time: %s" % (end-start)
#2次开启不同的线程数运行结果如下: #work_manager = WorkManager(10000, 10) cost all time: 100.641790867(单位:秒) #work_manager = WorkManager(10000, 20) cost all time:50.5233478546(单位:秒)
2,高效方法(1)
1 #-*-encoding:utf-8-*- 2 ''' 3 Created on 2012-3-9 4 @summary: 线程池 5 @contact: mailto:zhanglixinseu@gmail.com 6 @author: zhanglixin 7 ''' 8 import sys 9 import threading 10 import Queue 11 import traceback 12 13 # 定义一些Exception,用于自定义异常处理 14 15 class NoResultsPending(Exception): 16 """All works requests have been processed""" 17 pass 18 19 class NoWorkersAvailable(Exception): 20 """No worket threads available to process remaining requests.""" 21 pass 22 23 def _handle_thread_exception(request, exc_info): 24 """默认的异常处理函数,只是简单的打印""" 25 traceback.print_exception(*exc_info) 26 27 #classes 28 29 class WorkerThread(threading.Thread): 30 """后台线程,真正的工作线程,从请求队列(requestQueue)中获取work, 31 并将执行后的结果添加到结果队列(resultQueue)""" 32 def __init__(self,requestQueue,resultQueue,poll_timeout=5,**kwds): 33 threading.Thread.__init__(self,**kwds) 34 '''设置为守护进行''' 35 self.setDaemon(True) 36 self._requestQueue = requestQueue 37 self._resultQueue = resultQueue 38 self._poll_timeout = poll_timeout 39 '''设置一个flag信号,用来表示该线程是否还被dismiss,默认为false''' 40 self._dismissed = threading.Event() 41 self.start() 42 43 def run(self): 44 '''每个线程尽可能多的执行work,所以采用loop, 45 只要线程可用,并且requestQueue有work未完成,则一直loop''' 46 while True: 47 if self._dismissed.is_set(): 48 break 49 try: 50 ''' 51 Queue.Queue队列设置了线程同步策略,并且可以设置timeout。 52 一直block,直到requestQueue有值,或者超时 53 ''' 54 request = self._requestQueue.get(True,self._poll_timeout) 55 except Queue.Empty: 56 continue 57 else: 58 '''之所以在这里再次判断dimissed,是因为之前的timeout时间里,很有可能,该线程被dismiss掉了''' 59 if self._dismissed.is_set(): 60 self._requestQueue.put(request) 61 break 62 try: 63 '''执行callable,讲请求和结果以tuple的方式放入requestQueue''' 64 result = request.callable(*request.args,**request.kwds) 65 print self.getName() 66 self._resultQueue.put((request,result)) 67 except: 68 '''异常处理''' 69 request.exception = True 70 self._resultQueue.put((request,sys.exc_info())) 71 72 def dismiss(self): 73 '''设置一个标志,表示完成当前work之后,退出''' 74 self._dismissed.set() 75 76 77 class WorkRequest: 78 ''' 79 @param callable_:,可定制的,执行work的函数 80 @param args: 列表参数 81 @param kwds: 字典参数 82 @param requestID: id 83 @param callback: 可定制的,处理resultQueue队列元素的函数 84 @param exc_callback:可定制的,处理异常的函数 85 ''' 86 def __init__(self,callable_,args=None,kwds=None,requestID=None, 87 callback=None,exc_callback=_handle_thread_exception): 88 if requestID == None: 89 self.requestID = id(self) 90 else: 91 try: 92 self.requestID = hash(requestID) 93 except TypeError: 94 raise TypeError("requestId must be hashable") 95 self.exception = False 96 self.callback = callback 97 self.exc_callback = exc_callback 98 self.callable = callable_ 99 self.args = args or [] 100 self.kwds = kwds or {} 101 102 def __str__(self): 103 return "WorkRequest id=%s args=%r kwargs=%r exception=%s" % 104 (self.requestID,self.args,self.kwds,self.exception) 105 106 class ThreadPool: 107 ''' 108 @param num_workers:初始化的线程数量 109 @param q_size,resq_size: requestQueue和result队列的初始大小 110 @param poll_timeout: 设置工作线程WorkerThread的timeout,也就是等待requestQueue的timeout 111 ''' 112 def __init__(self,num_workers,q_size=0,resq_size=0,poll_timeout=5): 113 self._requestQueue = Queue.Queue(q_size) 114 self._resultQueue = Queue.Queue(resq_size) 115 self.workers = [] 116 self.dismissedWorkers = [] 117 self.workRequests = {} #设置个字典,方便使用 118 self.createWorkers(num_workers,poll_timeout) 119 120 def createWorkers(self,num_workers,poll_timeout=5): 121 '''创建num_workers个WorkThread,默认timeout为5''' 122 for i in range(num_workers): 123 self.workers.append(WorkerThread(self._requestQueue,self._resultQueue,poll_timeout=poll_timeout)) 124 125 def dismissWorkers(self,num_workers,do_join=False): 126 '''停用num_workers数量的线程,并加入dismiss_list''' 127 dismiss_list = [] 128 for i in range(min(num_workers,len(self.workers))): 129 worker = self.workers.pop() 130 worker.dismiss() 131 dismiss_list.append(worker) 132 if do_join : 133 for worker in dismiss_list: 134 worker.join() 135 else: 136 self.dismissedWorkers.extend(dismiss_list) 137 138 def joinAllDismissedWorkers(self): 139 '''join 所有停用的thread''' 140 #print len(self.dismissedWorkers) 141 for worker in self.dismissedWorkers: 142 worker.join() 143 self.dismissedWorkers = [] 144 145 def putRequest(self,request ,block=True,timeout=None): 146 assert isinstance(request,WorkRequest) 147 assert not getattr(request,'exception',None) 148 '''当queue满了,也就是容量达到了前面设定的q_size,它将一直阻塞,直到有空余位置,或是timeout''' 149 self._requestQueue.put(request, block, timeout) 150 self.workRequests[request.requestID] = request 151 152 def poll(self,block = False): 153 while True: 154 if not self.workRequests: 155 raise NoResultsPending 156 elif block and not self.workers: 157 raise NoWorkersAvailable 158 try: 159 '''默认只要resultQueue有值,则取出,否则一直block''' 160 request , result = self._resultQueue.get(block=block) 161 if request.exception and request.exc_callback: 162 request.exc_callback(request,result) 163 if request.callback and not (request.exception and request.exc_callback): 164 request.callback(request,result) 165 del self.workRequests[request.requestID] 166 except Queue.Empty: 167 break 168 169 def wait(self): 170 while True: 171 try: 172 self.poll(True) 173 except NoResultsPending: 174 break 175 176 def workersize(self): 177 return len(self.workers) 178 179 def stop(self): 180 '''join 所有的thread,确保所有的线程都执行完毕''' 181 self.dismissWorkers(self.workersize(),True) 182 self.joinAllDismissedWorkers()
测试代码:
1 #Test a demo 2 3 if __name__=='__main__': 4 import random 5 import time 6 import datetime 7 def do_work(data): 8 time.sleep(random.randint(1,3)) 9 res = str(datetime.datetime.now()) + "" +str(data) 10 return res 11 12 def print_result(request,result): 13 print "---Result from request %s : %r" % (request.requestID,result) 14 15 main = ThreadPool(3) 16 for i in range(40): 17 req = WorkRequest(do_work,args=[i],kwds={},callback=print_result) 18 main.putRequest(req) 19 print "work request #%s added." % req.requestID 20 21 print '-'*20, main.workersize(),'-'*20 22 23 counter = 0 24 while True: 25 try: 26 time.sleep(0.5) 27 main.poll() 28 if(counter==5): 29 print "Add 3 more workers threads" 30 main.createWorkers(3) 31 print '-'*20, main.workersize(),'-'*20 32 if(counter==10): 33 print "dismiss 2 workers threads" 34 main.dismissWorkers(2) 35 print '-'*20, main.workersize(),'-'*20 36 counter+=1 37 except NoResultsPending: 38 print "no pending results" 39 break 40 41 main.stop() 42 print "Stop"
高效版(2)(老外所作)
1 import threading 2 3 class WorkerTask(object): 4 """A task to be performed by the ThreadPool.""" 5 6 def __init__(self, function, args=(), kwargs={}): 7 self.function = function 8 self.args = args 9 self.kwargs = kwargs 10 11 def __call__(self): 12 self.function(*self.args, **self.kwargs) 13 14 15 class WorkerThread(threading.Thread): 16 """A thread managed by a thread pool.""" 17 18 def __init__(self, pool): 19 threading.Thread.__init__(self) 20 self.setDaemon(True) 21 self.pool = pool 22 self.busy = False 23 self._started = False 24 self._event = None 25 26 def work(self): 27 if self._started is True: 28 if self._event is not None and not self._event.isSet(): 29 self._event.set() 30 else: 31 self._started = True 32 self.start() 33 34 def run(self): 35 while True: 36 self.busy = True 37 while len(self.pool._tasks) > 0: 38 try: 39 task = self.pool._tasks.pop() 40 task() 41 except IndexError: 42 # Just in case another thread grabbed the task 1st. 43 pass 44 45 # Sleep until needed again 46 self.busy = False 47 if self._event is None: 48 self._event = threading.Event() 49 else: 50 self._event.clear() 51 self._event.wait() 52 53 class ThreadPool(object): 54 """Executes queued tasks in the background.""" 55 56 def __init__(self, max_pool_size=10): 57 self.max_pool_size = max_pool_size 58 self._threads = [] 59 self._tasks = [] 60 61 def _addTask(self, task): 62 self._tasks.append(task) 63 64 worker_thread = None 65 for thread in self._threads: 66 if thread.busy is False: 67 worker_thread = thread 68 break 69 70 if worker_thread is None and len(self._threads) <= self.max_pool_size: 71 worker_thread = WorkerThread(self) 72 self._threads.append(worker_thread) 73 74 if worker_thread is not None: 75 worker_thread.work() 76 77 def addTask(self, function, args=(), kwargs={}): 78 self._addTask(WorkerTask(function, args, kwargs)) 79 80 class GlobalThreadPool(object): 81 """ThreadPool Singleton class.""" 82 83 _instance = None 84 85 def __init__(self): 86 """Create singleton instance """ 87 88 if GlobalThreadPool._instance is None: 89 # Create and remember instance 90 GlobalThreadPool._instance = ThreadPool() 91 92 def __getattr__(self, attr): 93 """ Delegate get access to implementation """ 94 return getattr(self._instance, attr) 95 96 def __setattr__(self, attr, val): 97 """ Delegate set access to implementation """ 98 return setattr(self._instance, attr, val)