• python并发编程之进程池,线程池concurrent.futures


    进程池与线程池

    在刚开始学多进程或多线程时,我们迫不及待地基于多进程或多线程实现并发的套接字通信,然而这种实现方式的致命缺陷是:服务的开启的进程数或线程数都会随着并发的客户端数目地增多而增多,

    这会对服务端主机带来巨大的压力,甚至于不堪重负而瘫痪,于是我们必须对服务端开启的进程数或线程数加以控制,让机器在一个自己可以承受的范围内运行,这就是进程池或线程池的用途,

    例如进程池,就是用来存放进程的池子,本质还是基于多进程,只不过是对开启进程的数目加上了限制

    Python--concurrent.futures

    1.concurent.future模块是用来创建并行的任务,提供了更高级别的接口,
    为了异步执行调用
    2.concurent.future这个模块用起来非常方便,它的接口也封装的非常简单
    3.concurent.future模块既可以实现进程池,也可以实现线程池
    4.模块导入进程池和线程池
    from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
    p = ProcessPoolExecutor(max_works)对于进程池如果不写max_works:默认的是cpu的数目
    p = ThreadPoolExecutor(max_works)对于线程池如果不写max_works:默认的是cpu的数目*5

    基本方法

    1、submit(fn, *args, **kwargs)
    异步提交任务
    
    2、map(func, *iterables, timeout=None, chunksize=1) 
    取代for循环submit的操作
    
    3、shutdown(wait=True) 
    相当于进程池的pool.close()+pool.join()操作
    wait=True,等待池内所有任务执行完毕回收完资源后才继续
    wait=False,立即返回,并不会等待池内的任务执行完毕
    但不管wait参数为何值,整个程序都会等到所有任务执行完毕
    submit和map必须在shutdown之前
    
    4、result(timeout=None)
    取得结果
    
    5、add_done_callback(fn)
    回调函数
    

     

     进程池

    from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
    from threading import currentThread
    import os,time,random
    
    
    def task(n):
        print("%s is running " % os.getpid())
        time.sleep(random.randint(1,3))
        return n*2
    
    if __name__ == '__main__':
        start = time.time()
        executor = ProcessPoolExecutor(4)
    
        res = []
        for i in range(10):  # 开启10个任务
            future = executor.submit(task,i)  # 异步提交任务
            res.append(future)
    
        executor.shutdown()  # 等待所有进程执行完毕
        print("++++>")
        for r in res:
            print(r.result())  # 打印结果
    
        end = time.time()
        print(end - start)
    
    ---------------------输出
    2464 is running 
    9356 is running 
    10780 is running 
    9180 is running 
    2464 is running 
    10780 is running 
    9180 is running 
    9356 is running 
    10780 is running 
    9180 is running 
    ++++>
    0
    2
    4
    6
    8
    10
    12
    14
    16
    18
    6.643380165100098

    线程池

    from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
    from threading import currentThread
    import os,time,random
    
    
    def task(n):
        print("%s is running " % currentThread().getName())
        time.sleep(random.randint(1,3))
        return n*2
    
    if __name__ == '__main__':
        start = time.time()
        executor = ThreadPoolExecutor(4)  # 线程池
    
        res = []
        for i in range(10):  # 开启10个任务
            future = executor.submit(task,i)  # 异步提交任务
            res.append(future)
    
        executor.shutdown()  # 等待所有线程执行完毕
        print("++++>")
        for r in res:
            print(r.result())  # 打印结果
    
        end = time.time()
        print(end - start)
    
    ------------输出
    
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_0 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_1 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_2 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_3 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_3 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_1 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_0 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_2 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_3 is running 
    <concurrent.futures.thread.ThreadPoolExecutor object at 0x00000000025B0DA0>_1 is running 
    ++++>
    0
    2
    4
    6
    8
    10
    12
    14
    16
    18
    5.002286195755005

    回调函数

    import requests
    import time
    from concurrent.futures import ThreadPoolExecutor
    
    def get(url):
        print('GET {}'.format(url))
        response = requests.get(url)
        time.sleep(2)
        if response.status_code == 200:  # 200代表状态:下载成功了
            return {'url': url, 'content': response.text}
    
    def parse(res):
        print('%s parse res is %s' % (res['url'], len(res['content'])))
        return '%s parse res is %s' % (res['url'], len(res['content']))
    
    def save(res):
        print('save', res)
    
    def task(res):
        res = res.result()
        par_res = parse(res)
        save(par_res)
    
    
    if __name__ == '__main__':
        urls = [
                'http://www.cnblogs.com/linhaifeng',
                'https://www.python.org',
                'https://www.openstack.org',
            ]
    
        pool = ThreadPoolExecutor(2)
        for i in urls:
            pool.submit(get, i).add_done_callback(task)#这里的回调函数拿到的是一个对象。得
            #  先把返回的res得到一个结果。即在前面加上一个res.result() #谁好了谁去掉回调函数
                                    # 回调函数也是一种编程思想。不仅开线程池用,开线程池也用
        pool.shutdown()  #相当于进程池里的close和join
    
    -------------输出
    GET http://www.cnblogs.com/linhaifeng
    GET https://www.python.org
    http://www.cnblogs.com/linhaifeng parse res is 17426
    save http://www.cnblogs.com/linhaifeng parse res is 17426
    GET https://www.openstack.org
    https://www.python.org parse res is 48809
    save https://www.python.org parse res is 48809
    https://www.openstack.org parse res is 60632
    save https://www.openstack.org parse res is 60632
    

    map

    import requests
    import time
    from concurrent.futures import ThreadPoolExecutor
    
    def get(url):
        print('GET {}'.format(url))
        response = requests.get(url)
        time.sleep(2)
        if response.status_code == 200:  # 200代表状态:下载成功了
            return {'url': url, 'content_len': len(response.text)}
    
    
    
    if __name__ == '__main__':
        urls = [
                'http://www.cnblogs.com/linhaifeng',
                'https://www.python.org',
                'https://www.openstack.org',
            ]
    
        pool = ThreadPoolExecutor(2)
        res = pool.map(get, urls) #map取代了for+submit
    
        pool.shutdown()   # 相当于进程池里的close和join
        print('=' * 30)
        for r in res: # 返回的是一个迭代器
            print(r)
    
    GET http://www.cnblogs.com/linhaifeng
    GET https://www.python.org
    GET https://www.openstack.org
    {'url': 'http://www.cnblogs.com/linhaifeng', 'content_len': 17426}
    {'url': 'https://www.python.org', 'content_len': 48809}
    {'url': 'https://www.openstack.org', 'content_len': 60632}
    

      

    自定义线程池  

    from threading import Thread, currentThread
    import time
    import queue
    
    
    class MyThread(Thread):
    
        def __init__(self, queue):
            super().__init__()
            self.queue = queue
            self.daemon = True  # 子线程跟着主线程一起退出
            self.start()
    
        def run(self):
            """
                    1、让他始终去运行,
                    2、去获取queue里面的任务,
                     3、然后给任务分配函数去执行(获取任务在执行)
                    :return:
                    """
            while True:
                func, args, kwargs = self.queue.get()  # 从队列中获取任务
                func(*args, **kwargs)
                self.queue.task_done()  # 计数器  执行完这个任务后  (队列-1操作)
    
    
    class MyPool(object):
        """
        在任务来到之前,提前创建好线程,等待任务
        """
    
        def __init__(self, num):  # 线程数量
            self.num = num
            self.queue = queue.Queue()
            for i in range(self.num):
                MyThread(self.queue)
    
        def submit(self, func, args=(), kwargs={}):
            self.queue.put((func, args, kwargs))
    
        def join(self):
            self.queue.join()  # 等待队列里面的任务处理完毕
    
    
    def task(i):
        print(currentThread().getName(), i)
        time.sleep(2)
    
    
    if __name__ == '__main__':
        start = time.time()
        pool = MyPool(3)  # 实例化一个线程池
        for i in range(4):
            pool.submit(task, args=(i,))
        pool.join()
        print('运行的时间{}秒'.format(time.time() - start))
    

      

  • 相关阅读:
    《痕迹识人,面试读心》培训总结之一
    傲游与视频网站广告之战的思考
    EMLS项目推进思考
    二级证丢失如何找回
    Mathematica 讲座
    泊松方程解法
    Windows核心编程-作业
    Win7多用户同时登陆
    C语言文件操作类型速查
    openMP的一点使用经验
  • 原文地址:https://www.cnblogs.com/xiao-apple36/p/9499000.html
Copyright © 2020-2023  润新知