1 #!/user/bin/env python 2 # -*- coding:utf-8 -*- 3 4 # Semaphore 是用于控制进入数量的锁 5 # 文件、读、写、写一般只是用于一个线程写,读可以允许有多个 6 7 # 做爬虫 8 import threading 9 import time 10 11 12 class HtmlSpider(threading.Thread): 13 def __init__(self, url, sem): 14 super().__init__() 15 self.url = url 16 self.sem = sem 17 18 def run(self): 19 time.sleep(2) 20 print('got html text success') 21 self.sem.release() 22 23 24 class UrlProducer(threading.Thread): 25 def __init__(self, sem): 26 super().__init__() 27 self.sem = sem 28 29 def run(self): 30 for i in range(20): 31 # 每次调用Semaphore的acquire方法,sem = threading.Semaphore(3)设置的次数都会减一 32 self.sem.acquire() 33 html_thread = HtmlSpider('http://www.baidu.com/{}'.format(i), self.sem) 34 html_thread.start() 35 36 37 if __name__ == '__main__': 38 # 一次允许3个并发 39 sem = threading.Semaphore(3) 40 url_producer = UrlProducer(sem) 41 url_producer.start()
got html text success got html text success got html text success got html text successgot html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success got html text success
每2秒弹出一组3个的 ‘got html text success’
Semaphore底层是用Condition实现的