os.fork
该方法仅支持Linux/Unix
1 import os 2 3 print('Current Process %s start ...' % os.getpid()) 4 pid = os.fork() #复制主进程以生成一个子进程 5 if pid < 0: 6 print('error in fork ') 7 elif pid == 0: 8 print('I am child process %s and my parent process is %s' % (os.getpid(), os.getppid())) 9 else: 10 print('I %s created a child process %s' % (os.getppid(), os.getpid()))
multiprocessing
进程池 pool
1 from multiprocessing import Pool 2 import os, time, random 3 4 def run_task(name): 5 print('Task %s (pid = %s) is running...' % (name, os.getpid())) 6 time.sleep(random.random() * 3) 7 print('Task %s end.' % name) 8 9 if __name__ == '__main__': 10 print('Curret process %s.' % os.getpid()) 11 p = Pool(processes = 3) # 创建进程池 12 for i in range(5): 13 p.apply_async(run_task, args = (i, )) # 启动进程 14 print('Waiting for all subprocesses done...') 15 p.close() 16 p.join() 17 print('All subprocesses done.')
进程间通信
Queue
1 from multiprocessing import Queue, Process 2 import os, time, random 3 4 def proc_write(q, urls): 5 print('Process (%s) is writig...' % os.getpid()) 6 for url in urls: 7 q.put(url) # 往队列中添加 8 print('Put %s to queue...' % url) 9 time.sleep(random.random()) 10 11 def proc_read(q): 12 print('Process (%s) is writig...' % os.getpid()) 13 while True: 14 url = q.get(True) # 从队列中取出 15 print('Get %s from queue.' % url) 16 17 if __name__ == '__main__': 18 q = Queue() # 创建消息队列 19 # 创建进程 20 proc_writer1 = Process(target = proc_write, args = (q, ['url_1', 'url_2', 'url_3', 'url_4'])) 21 proc_writer2 = Process(target = proc_write, args = (q, ['url_5', 'url_6', 'url_7', 'url_8'])) 22 proc_reader = Process(target = proc_read, args = (q, )) 23 # 启动进程,往消息队列中添加 24 proc_writer1.start() 25 proc_writer2.start() 26 # 启动读queue进程 27 proc_reader.start() 28 # 等待proc_writer结束 29 proc_writer1.join() 30 proc_writer2.join() 31 # 由于proc_reader 进程里是死循环,这里强行终止 32 proc_reader.terminate()
Pipe
1 import multiprocessing, random, os, time 2 3 def proc_send(pipe, urls): 4 for url in urls: 5 print('Process %s send: %s ' % (os.getpid(), url)) 6 pipe.send(url) # 发送 url 7 time.sleep(random.random()) 8 9 def proc_recv(pipe): 10 while True: 11 print('Process %s rev: %s' % (os.getpid(), pipe.recv())) 12 time.sleep(random.random()) 13 14 if __name__ == '__main__': 15 pipe = multiprocessing.Pipe() 16 p1 = multiprocessing.Process(target = proc_send, args = (pipe[0], ['url_' + str(i) for i in range(10)])) 17 p2 = multiprocessing.Process(target = proc_recv, args = (pipe[1], )) 18 p1.start() 19 p2.start() 20 p1.join() 21 p2.terminate()
代码摘自 《Python爬虫开发与项目实战》-- 范伟辉