• 第十章:Python高级编程-多线程、多进程和线程池编程


    第十章:Python高级编程-多线程、多进程和线程池编程

    Python3高级核心技术97讲 笔记

    目录

    • 第十章:Python高级编程-多线程、多进程和线程池编程
      • 10.1 Python中的GIL
      • 10.2 多线程编程-threading
      • 10.3 线程间通信-共享变量和Queue
      • 10.4 线程同步-Lock、Rlock
      • 10.5 线程同步-condition使用以及源码分析
      • 10.6 线程同步-Semaphore使用及源码分析
      • 10.7 ThreadPoolExecutor线程池
      • 10.8 多进程和多线程对比
      • 10.9 multiprocessing多进程编程
      • 10.10 进程间通信-Queue、Pipe、Manager

    10.1 Python中的GIL

    """
    gil global interpreter lock (cpython)
    Python中一个线程对应于C语言中的一个线程
    gil是的同一时刻只有一个线程在一个cpu上执行字节码
    """
    
    # GIL会根据执行的字节码行数以及时间片释放, GIL遇到IO操作的时候会主动释放
    import dis
    
    
    def add(a):
        a = a + 1
        return a
    
    
    print(dis.dis(add))
    
    
    # ================ demo start =====================
    total = 0
    
    
    def add():
        global total
        for i in range(1000000):
            total -= 1
            
    
    def desc():
        global total
        for i in range(10000000):
            total -= 1
            
            
    import threading
    thread1 = threading.Thread(target=add)
    thread2 = threading.Thread(target=desc)
    thread1.start()
    thread2.start()
    
    thread1.join()
    thread2.join()
    print(total)  # GIL是会释放的
    

    10.2 多线程编程-threading

    # 对应IO操作来说,多线程和多进程性能差别不大
    # 1.通过Thread类实例化
    
    
    import time
    import threading
    
    def get_detail_html(url):
        print("get detail html started")
        time.sleep(2)
        print("get detail html end")
    
    
    def get_detail_url(url):
        print("get detail url started")
        time.sleep(4)
        print("get detail url end")
    
    
    #2. 通过集成Thread来实现多线程
    
    
    class GetDetailHtml(threading.Thread):
        def __init__(self, name):
            super().__init__(name=name)
    
        def run(self):
            print("get detail html started")
            time.sleep(2)
            print("get detail html end")
    
    
    class GetDetailUrl(threading.Thread):
        def __init__(self, name):
            super().__init__(name=name)
    
        def run(self):
            print("get detail url started")
            time.sleep(4)
            print("get detail url end")
    
    if  __name__ == "__main__":
        thread1 = GetDetailHtml("get_detail_html")
        thread2 = GetDetailUrl("get_detail_url")
        start_time = time.time()
        thread1.start()
        thread2.start()
    
       	# thread1.setDaemon(True)  # 设置为守护线程,主线程结束其立刻结束
        # thread2.setDaemon(True)
        
        thread1.join()
        thread2.join()
    
        #当主线程退出的时候, 子线程kill掉
        print ("last time: {}".format(time.time()-start_time))
    

    10.3 线程间通信-共享变量和Queue

    """
    1. 线程通信方式-共享变量
    """
    
    
    import threading
    
    DETAIL_URL_LIST = []
    
    
    def get_detail_html():
        # 爬取文章详情页
        global DETAIL_URL_LIST
        print("get detail html started")
        url = DEATIL_URL_LIST.pop()
        time.sleep(2)
        print("get detail html end")
    
    
    def get_detail_url():
        # 爬取文章列表页
        global DETAIL_URL_LIST
        print("get detail url started")
        time.sleep(4)
        for i in range(20):
            DETAIL_URL_LIST.append("http://projectsedu.com/{id}".format(id=i))
        print("get detail url end")
        
        
    if __name__ == "__main__":
        thread_detail_url = threading.Thread(target=get_detail_url)
        for i in range(10):
            html_thread = threading.Thread(target=get_detail_html)
            html_thread.start()
    
        
    # =====================================================================
    # 通过queue的方式进行线程间同步
    from queue import Queue
    
    
    import time
    import threading
    
    
    def get_detail_html(queue):
        # 爬取文章详情页
        while True:
            url = queue.get()  # 线程安全的,取不到阻塞
            # for url in detail_url_list:
            print("get detail html started")
            time.sleep(2)
            print("get detail html end")
    
    
    def get_detail_url(queue):
        # 爬取文章列表页
        while True:
            print("get detail url started")
            time.sleep(4)
            for i in range(20):
                queue.put("http://projectsedu.com/{id}".format(id=i))
            print("get detail url end")
    
    
    # 1. 线程通信方式- 共享变量
    
    if  __name__ == "__main__":
        detail_url_queue = Queue(maxsize=1000)
    
    
        thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_queue,))
        for i in range(10):
            html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,))
            html_thread.start()
        # # thread2 = GetDetailUrl("get_detail_url")
        start_time = time.time()
        # thread_detail_url.start()
        # thread_detail_url1.start()
        #
        # thread1.join()
        # thread2.join()
        detail_url_queue.task_done()
        detail_url_queue.join()  # 阻塞,等待task_done
    
        # 当主线程退出的时候, 子线程kill掉
        print ("last time: {}".format(time.time()-start_time))
    

    10.4 线程同步-Lock、Rlock

    from threading import Lock, RLock, Condition  # 可重入的锁
    
    # 在同一个线程里面,可以连续调用多次acquire, 一定要注意acquire的次数要和release的次数相等
    total = 0
    lock = RLock()
    def add():
        # 1. dosomething1
        # 2. io操作
        # 1. dosomething3
        global lock
        global total
        for i in range(1000000):
            lock.acquire()
            lock.acquire()
            total += 1
            lock.release()
            lock.release()
    
    
    def desc():
        global total
        global lock
        for i in range(1000000):
            lock.acquire()
            total -= 1
            lock.release()
    
    import threading
    thread1 = threading.Thread(target=add)
    thread2 = threading.Thread(target=desc)
    thread1.start()
    thread2.start()
    
    
    thread1.join()
    thread2.join()
    print(total)
    
    # 1. 用锁会影响性能
    # 2. 锁会引起死锁
    # 死锁的情况 A(a,b)
    """
    A(a、b)
    acquire (a)
    acquire (b)
    
    B(a、b)
    acquire (a)
    acquire (b)
    """
    

    10.5 线程同步-condition使用以及源码分析

    import threading
    
    #条件变量, 用于复杂的线程间同步
    # class XiaoAi(threading.Thread):
    #     def __init__(self, lock):
    #         super().__init__(name="小爱")
    #         self.lock = lock
    #
    #     def run(self):
    #         self.lock.acquire()
    #         print("{} : 在 ".format(self.name))
    #         self.lock.release()
    #
    #         self.lock.acquire()
    #         print("{} : 好啊 ".format(self.name))
    #         self.lock.release()
    #
    # class TianMao(threading.Thread):
    #     def __init__(self, lock):
    #         super().__init__(name="天猫精灵")
    #         self.lock = lock
    #
    #     def run(self):
    #
    #         self.lock.acquire()
    #         print("{} : 小爱同学 ".format(self.name))
    #         self.lock.release()
    #
    #         self.lock.acquire()
    #         print("{} : 我们来对古诗吧 ".format(self.name))
    #         self.lock.release()
    
    #通过condition完成协同读诗
    
    class XiaoAi(threading.Thread):
        def __init__(self, cond):
            super().__init__(name="小爱")
            self.cond = cond
    
        def run(self):
            with self.cond:
                self.cond.wait()
                print("{} : 在 ".format(self.name))
                self.cond.notify()
    
                self.cond.wait()
                print("{} : 好啊 ".format(self.name))
                self.cond.notify()
    
                self.cond.wait()
                print("{} : 君住长江尾 ".format(self.name))
                self.cond.notify()
    
                self.cond.wait()
                print("{} : 共饮长江水 ".format(self.name))
                self.cond.notify()
    
                self.cond.wait()
                print("{} : 此恨何时已 ".format(self.name))
                self.cond.notify()
    
                self.cond.wait()
                print("{} : 定不负相思意 ".format(self.name))
                self.cond.notify()
    
    class TianMao(threading.Thread):
        def __init__(self, cond):
            super().__init__(name="天猫精灵")
            self.cond = cond
    
        def run(self):
            with self.cond:
                print("{} : 小爱同学 ".format(self.name))
                self.cond.notify()
                self.cond.wait()
    
                print("{} : 我们来对古诗吧 ".format(self.name))
                self.cond.notify()
                self.cond.wait()
    
                print("{} : 我住长江头 ".format(self.name))
                self.cond.notify()
                self.cond.wait()
    
                print("{} : 日日思君不见君 ".format(self.name))
                self.cond.notify()
                self.cond.wait()
    
                print("{} : 此水几时休 ".format(self.name))
                self.cond.notify()
                self.cond.wait()
    
                print("{} : 只愿君心似我心 ".format(self.name))
                self.cond.notify()
                self.cond.wait()
    
    
    
    if __name__ == "__main__":
        from concurrent import futures
        cond = threading.Condition()
        xiaoai = XiaoAi(cond)
        tianmao = TianMao(cond)
    
        #启动顺序很重要
        #在调用with cond之后才能调用wait或者notify方法
        #condition有两层锁, 一把底层锁会在线程调用了wait方法的时候释放, 上面的锁会在每次调用wait的时候分配一把并放入到cond的等待队列中,等到notify方法的唤醒
        xiaoai.start()
        tianmao.start()
    

    10.6 线程同步-Semaphore使用及源码分析

    # Semaphore 是用于控制进入数量的锁
    # 文件, 读、写, 写一般只是用于一个线程写,读可以允许有多个
    
    # 做爬虫
    import threading
    import time
    
    class HtmlSpider(threading.Thread):
        def __init__(self, url, sem):
            super().__init__()
            self.url = url
            self.sem = sem
    
        def run(self):
            time.sleep(2)
            print("got html text success")
            self.sem.release()
    
    class UrlProducer(threading.Thread):
        def __init__(self, sem):
            super().__init__()
            self.sem = sem
    
        def run(self):
            for i in range(20):
                self.sem.acquire()
                html_thread = HtmlSpider("https://baidu.com/{}".format(i), self.sem)
                html_thread.start()
    
    if __name__ == "__main__":
        sem = threading.Semaphore(3)
        url_producer = UrlProducer(sem)
        url_producer.start()
    

    10.7 ThreadPoolExecutor线程池

    from concurrent.futures import ThreadPoolExecutor, as_completed, wait, FIRST_COMPLETED
    from concurrent.futures import Future
    from multiprocessing import Pool
    
    #未来对象,task的返回容器
    
    
    #线程池, 为什么要线程池
    #主线程中可以获取某一个线程的状态或者某一个任务的状态,以及返回值
    #当一个线程完成的时候我们主线程能立即知道
    #futures可以让多线程和多进程编码接口一致
    import time
    
    def get_html(times):
        time.sleep(times)
        print("get page {} success".format(times))
        return times
    
    
    
    executor = ThreadPoolExecutor(max_workers=2)
    #通过submit函数提交执行的函数到线程池中, submit 是立即返回
    # task1 = executor.submit(get_html, (3))
    # task2 = executor.submit(get_html, (2))
    
    
    #要获取已经成功的task的返回
    urls = [3,2,4]
    all_task = [executor.submit(get_html, (url)) for url in urls]
    wait(all_task, return_when=FIRST_COMPLETED)
    print("main")
    # for future in as_completed(all_task):
    #     data = future.result()
    #     print("get {} page".format(data))
    #通过executor的map获取已经完成的task的值
    # for data in executor.map(get_html, urls):
    #     print("get {} page".format(data))
    
    
    # #done方法用于判定某个任务是否完成
    # print(task1.done())
    # print(task2.cancel())
    # time.sleep(3)
    # print(task1.done())
    #
    # #result方法可以获取task的执行结果
    # print(task1.result())
    

    10.8 多进程和多线程对比

    import time
    from concurrent.futures import ThreadPoolExecutor, as_completed
    from concurrent.futures import ProcessPoolExecutor
    #多进程编程
    #耗cpu的操作,用多进程编程, 对于io操作来说, 使用多线程编程,进程切换代价要高于线程
    
    #1. 对于耗费cpu的操作,多进程由于多线程
    # def fib(n):
    #     if n<=2:
    #         return 1
    #     return fib(n-1)+fib(n-2)
    #
    # if __name__ == "__main__":
    #     with ThreadPoolExecutor(3) as executor:
    #         all_task = [executor.submit(fib, (num)) for num in range(25,40)]
    #         start_time = time.time()
    #         for future in as_completed(all_task):
    #             data = future.result()
    #             print("exe result: {}".format(data))
    #
    #         print("last time is: {}".format(time.time()-start_time))
    
    #2. 对于io操作来说,多线程优于多进程
    def random_sleep(n):
        time.sleep(n)
        return n
    
    if __name__ == "__main__":
        with ProcessPoolExecutor(3) as executor:
            all_task = [executor.submit(random_sleep, (num)) for num in [2]*30]
            start_time = time.time()
            for future in as_completed(all_task):
                data = future.result()
                print("exe result: {}".format(data))
    
            print("last time is: {}".format(time.time()-start_time))
    

    10.9 multiprocessing多进程编程

    # import os
    # #fork只能用于linux/unix中
    # pid = os.fork()
    # print("bobby")
    # if pid == 0:
    #   print('子进程 {} ,父进程是: {}.' .format(os.getpid(), os.getppid()))
    # else:
    #   print('我是父进程:{}.'.format(pid))
    
    
    import multiprocessing
    
    #多进程编程
    import time
    def get_html(n):
        time.sleep(n)
        print("sub_progress success")
        return n
    
    
    if __name__ == "__main__":
        # progress = multiprocessing.Process(target=get_html, args=(2,))
        # print(progress.pid)
        # progress.start()
        # print(progress.pid)
        # progress.join()
        # print("main progress end")
    
        #使用线程池
        pool = multiprocessing.Pool(multiprocessing.cpu_count())
        # result = pool.apply_async(get_html, args=(3,))
        #
        # #等待所有任务完成
        # pool.close()
        # pool.join()
        #
        # print(result.get())
    
        #imap
        # for result in pool.imap(get_html, [1,5,3]):
        #     print("{} sleep success".format(result))
    
        for result in pool.imap_unordered(get_html, [1,5,3]):
            print("{} sleep success".format(result))
    

    10.10 进程间通信-Queue、Pipe、Manager

    import time
    from multiprocessing import Process, Queue, Pool, Manager, Pipe
    
    
    # def producer(queue):
    #     queue.put("a")
    #     time.sleep(2)
    #
    # def consumer(queue):
    #     time.sleep(2)
    #     data = queue.get()
    #     print(data)
    #
    # if __name__ == "__main__":
    #     queue = Queue(10)
    #     my_producer = Process(target=producer, args=(queue,))
    #     my_consumer = Process(target=consumer, args=(queue,))
    #     my_producer.start()
    #     my_consumer.start()
    #     my_producer.join()
    #     my_consumer.join()
    
    #共享全局变量通信
    #共享全局变量不能适用于多进程编程,可以适用于多线程
    
    
    # def producer(a):
    #     a += 100
    #     time.sleep(2)
    #
    # def consumer(a):
    #     time.sleep(2)
    #     print(a)
    #
    # if __name__ == "__main__":
    #     a = 1
    #     my_producer = Process(target=producer, args=(a,))
    #     my_consumer = Process(target=consumer, args=(a,))
    #     my_producer.start()
    #     my_consumer.start()
    #     my_producer.join()
    #     my_consumer.join()
    
    #multiprocessing中的queue不能用于pool进程池
    #pool中的进程间通信需要使用manager中的queue
    
    # def producer(queue):
    #     queue.put("a")
    #     time.sleep(2)
    #
    # def consumer(queue):
    #     time.sleep(2)
    #     data = queue.get()
    #     print(data)
    #
    # if __name__ == "__main__":
    #     queue = Manager().Queue(10)
    #     pool = Pool(2)
    #
    #     pool.apply_async(producer, args=(queue,))
    #     pool.apply_async(consumer, args=(queue,))
    #
    #     pool.close()
    #     pool.join()
    
    #通过pipe实现进程间通信
    #pipe的性能高于queue
    
    # def producer(pipe):
    #     pipe.send("bobby")
    #
    # def consumer(pipe):
    #     print(pipe.recv())
    #
    # if __name__ == "__main__":
    #     recevie_pipe, send_pipe = Pipe()
    #     #pipe只能适用于两个进程
    #     my_producer= Process(target=producer, args=(send_pipe, ))
    #     my_consumer = Process(target=consumer, args=(recevie_pipe,))
    #
    #     my_producer.start()
    #     my_consumer.start()
    #     my_producer.join()
    #     my_consumer.join()
    
    def add_data(p_dict, key, value):
        p_dict[key] = value
    
    if __name__ == "__main__":
        progress_dict = Manager().dict()
        from queue import PriorityQueue
    
        first_progress = Process(target=add_data, args=(progress_dict, "bobby1", 22))
        second_progress = Process(target=add_data, args=(progress_dict, "bobby2", 23))
    
        first_progress.start()
        second_progress.start()
        first_progress.join()
        second_progress.join()
    
        print(progress_dict)
  • 相关阅读:
    语句
    1101 事务存储引擎触发器函数
    1029 作业
    1029 数据库表以及行的基本操作
    1023 笔记
    1022 作业
    1022 笔记
    1021 笔记
    查看pip已经安装过的包
    查看Python支持的.whl文件版本
  • 原文地址:https://www.cnblogs.com/lianhaifeng/p/13519832.html
Copyright © 2020-2023  润新知