• python 收录集中实现线程池的方法


    概念:

    什么是线程池?

          诸如web服务器、数据库服务器、文件服务器和邮件服务器等许多服务器应用都面向处理来自某些远程来源的大量短小的任务。构建服务器应用程序的一个过于简单的模型是:每当一个请求到达就创建一个新的服务对象,然后在新的服务对象中为请求服务。但当有大量请求并发访问时,服务器不断的创建和销毁对象的开销很大。所以提高服务器效率的一个手段就是尽可能减少创建和销毁对象的次数,特别是一些很耗资源的对象创建和销毁,这样就引入了“池”的概念,“池”的概念使得人们可以定制一定量的资源,然后对这些资源进行复用,而不是频繁的创建和销毁。

          线程池是预先创建线程的一种技术。线程池在还没有任务到来之前,创建一定数量的线程,放入空闲队列中。这些线程都是处于睡眠状态,即均为启动,不消耗CPU,而只是占用较小的内存空间。当请求到来之后,缓冲池给这次请求分配一个空闲线程,把请求传入此线程中运行,进行处理。当预先创建的线程都处于运行状态,即预制线程不够,线程池可以自由创建一定数量的新线程,用于处理更多的请求。当系统比较闲的时候,也可以通过移除一部分一直处于停用状态的线程。

    1,简易方法,利用队列实现

    (1)

    from threading import Thread
    
    import time, os, queue
    
    class Threading_Pool(object):
    
        def __init__(self, MAX_NUM = 20):
    
            self.queue = queue.Queue(MAX_NUM)
            # self.queue.join()
            for i in range(MAX_NUM):
                self.queue.put(Thread)
    
        def get_thread(self):
            return self.queue.get()
    
        def put_thread(self):
            self.queue.put(Thread)
    
        def join(self):
    
            self.queue.join()
    
    
    def test(tp):
        time.sleep(1)
        print('haha')
    
        tp.put_thread()
        # print(tp.queue.full())获取队列是否满了
    
    if __name__ == '__main__':
        tp = Threading_Pool(5)
        # tp.join()
        for i in range(20):
            p = tp.get_thread()
            t1 = p(target = test, args = (tp, ))
            t1.start()

    打印结果会1秒出5个

      

    (2)

     1 # -*- coding:utf-8 -*-  
     2   
     3 import Queue  
     4 import threading  
     5 import time  
     6   
     7 class WorkManager(object):  
     8     def __init__(self, work_num=1000,thread_num=2):  
     9         self.work_queue = Queue.Queue()  
    10         self.threads = []  
    11         self.__init_work_queue(work_num)  
    12         self.__init_thread_pool(thread_num)  
    13   
    14     """ 
    15         初始化线程 
    16     """  
    17     def __init_thread_pool(self,thread_num):  
    18         for i in range(thread_num):  
    19             self.threads.append(Work(self.work_queue))  
    20   
    21     """ 
    22         初始化工作队列 
    23     """  
    24     def __init_work_queue(self, jobs_num):  
    25         for i in range(jobs_num):  
    26             self.add_job(do_job, i)  
    27   
    28     """ 
    29         添加一项工作入队 
    30     """  
    31     def add_job(self, func, *args):  
    32         self.work_queue.put((func, list(args)))#任务入队,Queue内部实现了同步机制  
    33   
    34     """ 
    35         等待所有线程运行完毕 
    36     """     
    37     def wait_allcomplete(self):  
    38         for item in self.threads:  
    39             if item.isAlive():item.join()  
    40   
    41 class Work(threading.Thread):  
    42     def __init__(self, work_queue):  
    43         threading.Thread.__init__(self)  
    44         self.work_queue = work_queue  
    45         self.start()  
    46   
    47     def run(self):  
    48         #死循环,从而让创建的线程在一定条件下关闭退出  
    49         while True:  
    50             try:  
    51                 do, args = self.work_queue.get(block=False)#任务异步出队,Queue内部实现了同步机制  
    52                 do(args)  
    53                 self.work_queue.task_done()#通知系统任务完成  
    54             except:  
    55                 break  
    56   
    57 #具体要做的任务  
    58 def do_job(args):  
    59     time.sleep(0.1)#模拟处理时间  
    60     print threading.current_thread(), list(args)  
    61   
    62 if __name__ == '__main__':  
    63     start = time.time()  
    64     work_manager =  WorkManager(10000, 10)#或者work_manager =  WorkManager(10000, 20)  
    65     work_manager.wait_allcomplete()  
    66     end = time.time()  
    67     print "cost all time: %s" % (end-start)  

    #2次开启不同的线程数运行结果如下: #work_manager = WorkManager(10000, 10) cost all time: 100.641790867(单位:秒) #work_manager = WorkManager(10000, 20) cost all time:50.5233478546(单位:秒)

    2,高效方法(1)

      1 #-*-encoding:utf-8-*-
      2 '''
      3 Created on 2012-3-9
      4 @summary: 线程池
      5 @contact: mailto:zhanglixinseu@gmail.com
      6 @author: zhanglixin
      7 '''
      8 import sys
      9 import threading
     10 import Queue
     11 import traceback
     12 
     13 # 定义一些Exception,用于自定义异常处理
     14 
     15 class NoResultsPending(Exception):
     16     """All works requests have been processed"""
     17     pass
     18 
     19 class NoWorkersAvailable(Exception):
     20     """No worket threads available to process remaining requests."""
     21     pass
     22 
     23 def _handle_thread_exception(request, exc_info):
     24     """默认的异常处理函数,只是简单的打印"""
     25     traceback.print_exception(*exc_info)
     26 
     27 #classes 
     28 
     29 class WorkerThread(threading.Thread):
     30     """后台线程,真正的工作线程,从请求队列(requestQueue)中获取work,
     31     并将执行后的结果添加到结果队列(resultQueue)"""
     32     def __init__(self,requestQueue,resultQueue,poll_timeout=5,**kwds):
     33         threading.Thread.__init__(self,**kwds)
     34         '''设置为守护进行'''
     35         self.setDaemon(True)
     36         self._requestQueue = requestQueue
     37         self._resultQueue = resultQueue
     38         self._poll_timeout = poll_timeout
     39         '''设置一个flag信号,用来表示该线程是否还被dismiss,默认为false'''
     40         self._dismissed = threading.Event()
     41         self.start()
     42         
     43     def run(self):
     44         '''每个线程尽可能多的执行work,所以采用loop,
     45         只要线程可用,并且requestQueue有work未完成,则一直loop'''
     46         while True:
     47             if self._dismissed.is_set():
     48                 break
     49             try:
     50                 '''
     51                 Queue.Queue队列设置了线程同步策略,并且可以设置timeout。
     52                 一直block,直到requestQueue有值,或者超时
     53                 '''
     54                 request = self._requestQueue.get(True,self._poll_timeout)
     55             except Queue.Empty:
     56                 continue
     57             else:
     58                 '''之所以在这里再次判断dimissed,是因为之前的timeout时间里,很有可能,该线程被dismiss掉了'''
     59                 if self._dismissed.is_set():
     60                     self._requestQueue.put(request)
     61                     break
     62                 try:
     63                     '''执行callable,讲请求和结果以tuple的方式放入requestQueue'''
     64                     result = request.callable(*request.args,**request.kwds)
     65                     print self.getName()
     66                     self._resultQueue.put((request,result))
     67                 except:
     68                     '''异常处理'''
     69                     request.exception = True
     70                     self._resultQueue.put((request,sys.exc_info()))
     71     
     72     def dismiss(self):
     73         '''设置一个标志,表示完成当前work之后,退出'''
     74         self._dismissed.set()
     75 
     76 
     77 class WorkRequest:
     78     '''
     79     @param callable_:,可定制的,执行work的函数
     80     @param args: 列表参数
     81     @param kwds: 字典参数
     82     @param requestID: id
     83     @param callback: 可定制的,处理resultQueue队列元素的函数
     84     @param exc_callback:可定制的,处理异常的函数 
     85     '''
     86     def __init__(self,callable_,args=None,kwds=None,requestID=None,
     87                  callback=None,exc_callback=_handle_thread_exception):
     88         if requestID == None:
     89             self.requestID = id(self)
     90         else:
     91             try:
     92                 self.requestID = hash(requestID)
     93             except TypeError:
     94                 raise TypeError("requestId must be hashable")    
     95         self.exception = False
     96         self.callback = callback
     97         self.exc_callback = exc_callback
     98         self.callable = callable_
     99         self.args = args or []
    100         self.kwds = kwds or {}
    101         
    102     def __str__(self):
    103         return "WorkRequest id=%s args=%r kwargs=%r exception=%s" % 
    104             (self.requestID,self.args,self.kwds,self.exception)
    105             
    106 class ThreadPool:
    107     '''
    108     @param num_workers:初始化的线程数量
    109     @param q_size,resq_size: requestQueue和result队列的初始大小
    110     @param poll_timeout: 设置工作线程WorkerThread的timeout,也就是等待requestQueue的timeout
    111     '''
    112     def __init__(self,num_workers,q_size=0,resq_size=0,poll_timeout=5):
    113         self._requestQueue = Queue.Queue(q_size)
    114         self._resultQueue = Queue.Queue(resq_size)
    115         self.workers = []
    116         self.dismissedWorkers = []
    117         self.workRequests = {} #设置个字典,方便使用
    118         self.createWorkers(num_workers,poll_timeout)
    119 
    120     def createWorkers(self,num_workers,poll_timeout=5):
    121         '''创建num_workers个WorkThread,默认timeout为5'''
    122         for i in range(num_workers):
    123             self.workers.append(WorkerThread(self._requestQueue,self._resultQueue,poll_timeout=poll_timeout))                           
    124     
    125     def dismissWorkers(self,num_workers,do_join=False):
    126         '''停用num_workers数量的线程,并加入dismiss_list'''
    127         dismiss_list = []
    128         for i in range(min(num_workers,len(self.workers))):
    129             worker = self.workers.pop()
    130             worker.dismiss()
    131             dismiss_list.append(worker)
    132         if do_join :
    133             for worker in dismiss_list:
    134                 worker.join()
    135         else:
    136             self.dismissedWorkers.extend(dismiss_list)
    137     
    138     def joinAllDismissedWorkers(self):
    139         '''join 所有停用的thread'''
    140         #print len(self.dismissedWorkers)
    141         for worker in self.dismissedWorkers:
    142             worker.join()
    143         self.dismissedWorkers = []
    144     
    145     def putRequest(self,request ,block=True,timeout=None):
    146         assert isinstance(request,WorkRequest)
    147         assert not getattr(request,'exception',None)
    148         '''当queue满了,也就是容量达到了前面设定的q_size,它将一直阻塞,直到有空余位置,或是timeout'''
    149         self._requestQueue.put(request, block, timeout)
    150         self.workRequests[request.requestID] = request
    151         
    152     def poll(self,block = False):
    153         while True:
    154             if not self.workRequests:
    155                 raise NoResultsPending
    156             elif block and not self.workers:
    157                 raise NoWorkersAvailable
    158             try:
    159                 '''默认只要resultQueue有值,则取出,否则一直block'''
    160                 request , result = self._resultQueue.get(block=block)
    161                 if request.exception and request.exc_callback:
    162                     request.exc_callback(request,result)
    163                 if request.callback and not (request.exception and request.exc_callback):
    164                     request.callback(request,result)
    165                 del self.workRequests[request.requestID]
    166             except Queue.Empty:
    167                 break
    168     
    169     def wait(self):
    170         while True:
    171             try:
    172                 self.poll(True)
    173             except NoResultsPending:
    174                 break
    175     
    176     def workersize(self):
    177         return len(self.workers)
    178     
    179     def stop(self):
    180         '''join 所有的thread,确保所有的线程都执行完毕'''
    181         self.dismissWorkers(self.workersize(),True)
    182         self.joinAllDismissedWorkers()

    测试代码:

     1 #Test a demo
     2 
     3 if __name__=='__main__':
     4     import random
     5     import time
     6     import datetime
     7     def do_work(data):
     8         time.sleep(random.randint(1,3))
     9         res = str(datetime.datetime.now()) + "" +str(data)
    10         return res
    11     
    12     def print_result(request,result):
    13         print "---Result from request %s : %r" % (request.requestID,result)
    14     
    15     main = ThreadPool(3)
    16     for i in range(40):
    17         req = WorkRequest(do_work,args=[i],kwds={},callback=print_result)
    18         main.putRequest(req)
    19         print "work request #%s added." % req.requestID
    20     
    21     print '-'*20, main.workersize(),'-'*20
    22     
    23     counter = 0
    24     while True:
    25         try:
    26             time.sleep(0.5)
    27             main.poll()
    28             if(counter==5):
    29                 print "Add 3 more workers threads"
    30                 main.createWorkers(3)
    31                 print '-'*20, main.workersize(),'-'*20
    32             if(counter==10):
    33                 print "dismiss 2 workers threads"
    34                 main.dismissWorkers(2)
    35                 print '-'*20, main.workersize(),'-'*20
    36             counter+=1
    37         except NoResultsPending:
    38             print "no pending results"
    39             break
    40     
    41     main.stop()
    42     print "Stop"

    高效版(2)(老外所作)

     1 import threading  
     2   
     3 class WorkerTask(object):  
     4     """A task to be performed by the ThreadPool."""  
     5   
     6     def __init__(self, function, args=(), kwargs={}):  
     7         self.function = function  
     8         self.args = args  
     9         self.kwargs = kwargs  
    10   
    11     def __call__(self):  
    12         self.function(*self.args, **self.kwargs)  
    13   
    14   
    15 class WorkerThread(threading.Thread):  
    16     """A thread managed by a thread pool."""  
    17   
    18     def __init__(self, pool):  
    19         threading.Thread.__init__(self)  
    20         self.setDaemon(True)  
    21         self.pool = pool  
    22         self.busy = False  
    23         self._started = False  
    24         self._event = None  
    25   
    26     def work(self):  
    27         if self._started is True:  
    28             if self._event is not None and not self._event.isSet():  
    29                 self._event.set()  
    30         else:  
    31             self._started = True  
    32             self.start()  
    33   
    34     def run(self):  
    35         while True:  
    36             self.busy = True  
    37             while len(self.pool._tasks) > 0:  
    38                 try:  
    39                     task = self.pool._tasks.pop()  
    40                     task()  
    41                 except IndexError:  
    42                     # Just in case another thread grabbed the task 1st.  
    43                     pass  
    44   
    45             # Sleep until needed again  
    46             self.busy = False  
    47             if self._event is None:  
    48                 self._event = threading.Event()  
    49             else:  
    50                 self._event.clear()  
    51             self._event.wait()  
    52   
    53 class ThreadPool(object):  
    54     """Executes queued tasks in the background."""  
    55   
    56     def __init__(self, max_pool_size=10):  
    57         self.max_pool_size = max_pool_size  
    58         self._threads = []  
    59         self._tasks = []   
    60   
    61     def _addTask(self, task):  
    62         self._tasks.append(task)  
    63   
    64         worker_thread = None  
    65         for thread in self._threads:  
    66             if thread.busy is False:  
    67                 worker_thread = thread  
    68                 break  
    69   
    70         if worker_thread is None and len(self._threads) <= self.max_pool_size:  
    71             worker_thread = WorkerThread(self)  
    72             self._threads.append(worker_thread)  
    73   
    74         if worker_thread is not None:  
    75             worker_thread.work()  
    76   
    77     def addTask(self, function, args=(), kwargs={}):  
    78         self._addTask(WorkerTask(function, args, kwargs))  
    79   
    80 class GlobalThreadPool(object):  
    81     """ThreadPool Singleton class."""  
    82   
    83     _instance = None  
    84   
    85     def __init__(self):  
    86         """Create singleton instance """  
    87   
    88         if GlobalThreadPool._instance is None:  
    89             # Create and remember instance  
    90             GlobalThreadPool._instance = ThreadPool()  
    91   
    92     def __getattr__(self, attr):  
    93         """ Delegate get access to implementation """  
    94         return getattr(self._instance, attr)  
    95   
    96     def __setattr__(self, attr, val):  
    97         """ Delegate set access to implementation """  
    98         return setattr(self._instance, attr, val)
  • 相关阅读:
    python列表切片
    python注释行与段落
    PCL安装与配置
    自动驾驶相关
    (转)ping命令
    (转)linux应用之test命令详细解析
    (转)shell解析命令行的过程以及eval命令
    (转)ssh-keygen 中文手册
    (转)stty 命令说明及使用讲解
    (转)CentOS下的trap命令
  • 原文地址:https://www.cnblogs.com/wangwei916797941/p/6773405.html
Copyright © 2020-2023  润新知