• python小demo-01: 线程池+多进程实现cpu密集型操作


    起因:

    公司有一个小项目,大概逻辑如下:

      服务器A会不断向队列中push消息,消息主要内容是视频的地址,服务器B则需要不断从队列中pop消息,然后将该视频进行剪辑最终将剪辑后的视频保存到云服务器。个人主要实现B服务器逻辑。

    实现思路:

      1 线程池+多进程

        要求点一:主进程要以daemon的方式运行。
        要求点二:利用线程池,设置最大同时运行的worker,每一个线程通过调用subprocess中的Popen来运行wget ffprobe ffmpeg等命令处理视频。

      2 消息队列采用redis的list实现

      3 主线程从队列中获取到消息后,从线程池中获取空闲从线程(在这里,非主线程统称为从线程,下同),从线程对该消息做一些逻辑上的处理后,然后生成进程对视频进行剪辑,最后上传视频。

        要求点三:为了让daemon能在收到signint信号时,处理完当前正在进行的worker后关闭,且不能浪费队列中的数据,需要让主进程在有空闲worker时才从队列中获取数据。

    大概就是这样:

    基本上主要资源耗费在视频下载以及视频处理上,且同时运行的worker(从线程)不会太多(一般cpu有几个就设置几个worker)。

    上面一共有三个要求点,其中要求点二并不费事。所以忽略。

    实现

    要求点一实现:

    # -*- coding: utf8 -*-
    import os
    import sys
    import time
    import signal
    import traceback
    
    
    # from shadowsocks
    def write_pid_file(pid_file, pid):
        import fcntl
        import stat
    
        try:
            fd = os.open(pid_file, os.O_RDWR | os.O_CREAT,
                         stat.S_IRUSR | stat.S_IWUSR)
        except OSError:
            traceback.print_exc()
            return -1
        flags = fcntl.fcntl(fd, fcntl.F_GETFD)
        assert flags != -1
        flags |= fcntl.FD_CLOEXEC
        r = fcntl.fcntl(fd, fcntl.F_SETFD, flags)
        assert r != -1
        # There is no platform independent way to implement fcntl(fd, F_SETLK, &fl)
        # via fcntl.fcntl. So use lockf instead
        try:
            fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB, 0, 0, os.SEEK_SET)
        except IOError:
            r = os.read(fd, 32)
            if r:
                print('already started at pid %s' % (r))
            else:
                print('already started')
            os.close(fd)
            return -1
        os.ftruncate(fd, 0)
        os.write(fd, (str(pid)))
        return 0
    
    
    def freopen(f, mode, stream):
        oldf = open(f, mode)
        oldfd = oldf.fileno()
        newfd = stream.fileno()
        os.close(newfd)
        os.dup2(oldfd, newfd)
    
    
    def daemon_start(settings, main_process_handler):
        def handle_exit(signum, _):
            if signum == signal.SIGTERM:
                sys.exit(0)
            sys.exit(1)
    
        signal.signal(signal.SIGINT, handle_exit)
        signal.signal(signal.SIGTERM, handle_exit)
        pid = os.fork()
        assert pid != -1
    
        # Parent
        if pid:
            time.sleep(3)
            sys.exit(0)
    
        print("child has forked")
        # child signals its parent to exit
        ppid = os.getppid()
        pid = os.getpid()
        if write_pid_file(settings.PID_FILE, pid) != 0:
            os.kill(ppid, signal.SIGINT)
            sys.exit(1)
    
        # set self to process-group-leader
        os.setsid()
    
        signal.signal(signal.SIGHUP, signal.SIG_IGN)
    
        print('started')
        os.kill(ppid, signal.SIGTERM)
    
        # octal 022
        os.umask(18)
        sys.stdin.close()
        try:
            freopen(settings.DEBUG_LOG_PATH, 'a', sys.stdout)
            freopen(settings.DEBUG_LOG_PATH, 'a', sys.stderr)
        except IOError:
            print(traceback.print_exc())
            sys.exit(1)
    
        main_process_handler()
    
    
    def daemon_stop(pid_file):
        import errno
        try:
            with open(pid_file) as f:
                pid = buf = f.read()
                if not buf:
                    print('not running')
        except IOError as e:
            print(traceback.print_exc())
            if e.errno == errno.ENOENT:
                print("not running")
                # always exit 0 if we are sure daemon is not running
                return
            sys.exit(1)
        pid = int(pid)
        if pid > 0:
            try:
                os.kill(pid, signal.SIGTERM)
            except OSError as e:
                if e.errno == errno.ESRCH:
                    print('not running')
                    # always exit 0 if we are sure daemon is not running
                    return
                print(traceback.print_exc())
                sys.exit(1)
        else:
            print('pid is not positive: %d', pid)
    
        # sleep for maximum 300s
        for i in range(0, 100):
            try:
                # query for the pid
                os.kill(pid, 0)
            except OSError as e:
                # not found the process
                if e.errno == errno.ESRCH:
                    break
            time.sleep(3)
            print("waiting for all threads finished....")
        else:
            print('timed out when stopping pid %d', pid)
            sys.exit(1)
        print('stopped')
        os.unlink(pid_file)
    
    
    def main():
        args = sys.argv[1:]
        assert len(args) == 2
        if args[0] not in ["stop", "start"]:
            print("only supported: [stop | start]")
            return
        if args[1] not in ["dev", "local", "prod"]:
            print("only supported: [dev | local | prod]")
    
        from globals import set_settings, initialize_redis
        set_settings(args[1])
        initialize_redis()
        from globals import settings
        import entry
    
        if args[0] == "stop":
            print("stopping...")
            daemon_stop(settings.PID_FILE)
        elif args[0] == "start":
            print("starting...")
            daemon_start(settings, entry.run)
    
    
    main()
    daemon.py

    要求点三实现:

    线程池,采用python的futures模块。该模块提供了线程池的机制。稍微说一下他的线程池实现原理吧,ThreadPoolExecutor该类实现了线程池:

      1 每个实例本身有个_work_queue属性,这是一个Queue对象,里面存储了任务。

      2 每当我们调用该对象的submit方法时,都会向其_work_queue中放入一个任务,同时生成从线程,直到从线程数达到max_worker所设定的值。

      3 该线程池实例中所有的从线程会不断的从_work_queue中获取任务,并执行。同时从线程的daemon属性被设置为True

    # -*- coding: utf8 -*-
    import json
    import traceback
    import signal
    import sys
    import time
    from threading import Lock
    from concurrent.futures import ThreadPoolExecutor
    from .globals import settings, video_info_queue
    
    
    
    
    def handler(data):
        # 业务逻辑
    
    
    running_futures_count = 0
    
    
    def run():
        global running_futures_count
        count_lock = Lock()
    
        pool = ThreadPoolExecutor(max_workers=settings.MAX_WORKER)
        try:
            def reduce_count(_):
                global running_futures_count
                with count_lock:
                    running_futures_count -= 1
    
            def handle_exit(_, __):
                print("get SIGINT signal")
                pool.shutdown(False)
                while True:
                    if running_futures_count == 0:
                        sys.exit(0)
                    time.sleep(1)
                    print("now running futures count is %s, please wait" % running_futures_count)
    
            def handle_data(data):
                global running_futures_count
                with count_lock:
                    running_futures_count += 1
                future = pool.submit(handler, data)
                future.add_done_callback(reduce_count)
    
            signal.signal(signal.SIGINT, handle_exit)
            signal.signal(signal.SIGTERM, handle_exit)
    
            while not pool._shutdown:
                print(len(pool._work_queue.queue), pool._shutdown)
                while not pool._shutdown and (len(pool._work_queue.queue) < pool._max_workers):
                    data = video_info_queue.bpop(20)
                    if data:
                        handle_data(data)
                    else:
                        data = abnormal_video_info_queue.bpop(1)
                        print("video_info_queue is empty, get data: %s from abnormal_video_info_queue" % data)
                        if data:
                            print("abnormal_video_info_queue")
                            handle_data(data)
                time.sleep(5)
                print("now all the workers is busy, so wait and do not submit")
        finally:
            pool.shutdown(False)
    entry.py

    重点就是那嵌套的while循环。

    踩坑&收获:

      1 python中只有主线程才能处理信号,如果使用线程中的join方法阻塞主线程,如果从线程运行时间过长可能会导致信号长时间无法处理。所以尽量设置从线程的daemon为True。

      2 Queue的底层是deque,而deque的底层是一个双端链表,为什么用双端链表而不用list?答案请在参考中找。

      3 学会了进程以daemon方式运行的实现方式:

        1 pid文件的来源

        2 进程以及进程组的概念

        3 信号的捕捉

        4 dup2函数以及fcntl函数

      4 进程使用Popen()创建时,如果用PIPE作为子进程(stdin stdout stderr)与父进程进行交互时,然后调用wait时,如果子进程的stdin stdout stderr中某个数据过多可能会导致主进程卡死。原因也在参考中找。

      5 sudo执行脚本时环境变量去哪了?答案请在参考中找

      6 python中的weakref模块很有用啊

    参考:

      1 http://blog.sina.com.cn/s/blog_4da051a60102uyvg.html

      2 https://toutiao.io/posts/zr31ak/preview

      3 https://www.cnblogs.com/chybot/p/5176118.html

      4 https://stackoverflow.com/questions/5045771/python-how-to-prevent-subprocesses-from-receiving-ctrl-c-control-c-sigint

      5 http://siwind.iteye.com/blog/1753517

      6 https://www.jianshu.com/p/646d1d09fc53

      7 https://stackoverflow.com/questions/46598710/how-to-use-pipes-and-redirects-using-os-execv-if-possible

      8 http://xiaorui.cc/2017/02/22/%E4%B8%8D%E8%A6%81%E7%B2%97%E6%9A%B4%E7%9A%84%E9%94%80%E6%AF%81python%E7%BA%BF%E7%A8%8B/

      9 shadowsocks源码

      

  • 相关阅读:
    Oracle逻辑备份与恢复
    Java调用webservice接口方法
    Weblogic11g下调WebService出现的一系列问题
    数字转中文【适用于金额转换和普通数字转换】
    Tomcat6环境JBPM4.4报错:java.lang.ClassNotFoundException: de.odysseus.el.util.SimpleResolver
    Hibernate之Criteria的完整用法
    Oracle SQL Developer连接报错(ORA-12505)
    Windows Storage Server 2008 R2 Standard(64位)之ASM(Automated Storage Manager)管理
    REHL5.5 linux的postfix的邮件服务器配置 (笔记)
    JVM中class文件探索与解析
  • 原文地址:https://www.cnblogs.com/MnCu8261/p/9859266.html
Copyright © 2020-2023  润新知