s15day34 进程
内容回顾:
1. GIL锁
2. 进程和线程的区别?
第一:
进程是cpu资源分配的最小单元。
线程是cpu计算的最小单元。
第二:
一个进程中可以有多个线程。
第三:
对于Python来说他的进程和线程和其他语言有差异,是有GIL锁。
GIL锁保证一个进程中同一时刻只有一个线程被cpu调度。
注意:IO密集型操作可以使用多线程;计算密集型可以使用多进程;
3. Lock和RLock
4. 线程池
5. threading.local
6. 常用方法
7. 面向对象补充:
class Foo(object):
def __init__(self):
object.__setattr__(self, 'info', {}) # 在对象中设置值的本质
def __setattr__(self, key, value):
self.info[key] = value
def __getattr__(self, item):
print(item)
return self.info[item]
obj = Foo()
obj.name = 'alex'
print(obj.name)
今日内容:
1. 进程
2. 数据共享
3. 锁
4. 进程池
5. 模块(爬虫)
- requests
- bs4(beautifulsoup)
6. 协程
内容详细:
1. 进程
- 进程间数据不共享
data_list = []
def task(arg):
data_list.append(arg)
print(data_list)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
# p = threading.Thread(target=task,args=(i,))
p.start()
if __name__ == '__main__':
run()
- 常用功能:
- join
- deamon
- name
- multiprocessing.current_process()
- multiprocessing.current_process().ident/pid
- 类继承方式创建进程
class MyProcess(multiprocessing.Process):
def run(self):
print('当前进程',multiprocessing.current_process())
def run():
p1 = MyProcess()
p1.start()
p2 = MyProcess()
p2.start()
if __name__ == '__main__':
run()
2. 进程间数据共享
Queue:
linux:
q = multiprocessing.Queue()
def task(arg,q):
q.put(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task, args=(i, q,))
p.start()
while True:
v = q.get()
print(v)
run()
windows:
def task(arg,q):
q.put(arg)
if __name__ == '__main__':
q = multiprocessing.Queue()
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,q,))
p.start()
while True:
v = q.get()
print(v)
Manager:(*)
Linux:
m = multiprocessing.Manager()
dic = m.dict()
def task(arg):
dic[arg] = 100
def run():
for i in range(10):
p = multiprocessing.Process(target=task, args=(i,))
p.start()
input('>>>')
print(dic.values())
if __name__ == '__main__':
run()
windows:
def task(arg,dic):
time.sleep(2)
dic[arg] = 100
if __name__ == '__main__':
m = multiprocessing.Manager()
dic = m.dict()
process_list = []
for i in range(10):
p = multiprocessing.Process(target=task, args=(i,dic,))
p.start()
process_list.append(p)
while True:
count = 0
for p in process_list:
if not p.is_alive():
count += 1
if count == len(process_list):
break
print(dic)
3. 进程锁
import time
import threading
import multiprocessing
lock = multiprocessing.RLock()
def task(arg):
print('鬼子来了')
lock.acquire()
time.sleep(2)
print(arg)
lock.release()
if __name__ == '__main__':
p1 = multiprocessing.Process(target=task,args=(1,))
p1.start()
p2 = multiprocessing.Process(target=task, args=(2,))
p2.start()
为什么要加锁?
4. 进程池
import time
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def task(arg):
time.sleep(2)
print(arg)
if __name__ == '__main__':
pool = ProcessPoolExecutor(5)
for i in range(10):
pool.submit(task,i)
5. 初识爬虫:
安装:
pip3 install requests
pip3 install beautifulsoup4
问题:
找不到内部指令?
方式一:
C:UsersAdministratorAppDataLocalProgramsPythonPython36Scriptspip3 install requests
方式二:
C:UsersAdministratorAppDataLocalProgramsPythonPython36Scripts
pip3 install requests
示例:
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
# 模拟浏览器发送请求
# 内部创建 sk = socket.socket()
# 和抽屉进行socket连接 sk.connect(...)
# sk.sendall('...')
# sk.recv(...)
def task(url):
print(url)
r1 = requests.get(
url=url,
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
}
)
# 查看下载下来的文本信息
soup = BeautifulSoup(r1.text,'html.parser')
print(soup.text)
# content_list = soup.find('div',attrs={'id':'content-list'})
# for item in content_list.find_all('div',attrs={'class':'item'}):
# title = item.find('a').text.strip()
# target_url = item.find('a').get('href')
# print(title,target_url)
def run():
pool = ThreadPoolExecutor(5)
for i in range(1,50):
pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)
if __name__ == '__main__':
run()
相关:
a. 以上示例进程和线程那个好?
- 线程好
b. requests模块模拟浏览器发送请求
- 本质 requests.get(...):
- 创建socket客户端
- 连接 【阻塞】
- 发送请求
- 接收请求【阻塞】
- 断开连接
c. 线程和进程池
重点总结:
1. 进程 *****
- windows
- linux
2. 进程数据共享 *****
- Queue
- Manager
3. 进程锁 ***
4. 进程池 *****
5. 爬虫(进程池/线程池的应用)
# by luffycity.com
"""
面向对象补充
"""
"""
class Foo(object):
def __init__(self):
self.info = {}
def __setitem__(self, key, value):
self.info[key] = value
def __getitem__(self, item):
return self.info.get(item)
obj = Foo()
obj['x'] = 123
print(obj['x'])
"""
from flask import globals
# class Foo(object):
#
# def __init__(self):
# object.__setattr__(self, 'info', {}) # 在对象中设置值的本质
#
# def __setattr__(self, key, value):
# self.info[key] = value
#
# def __getattr__(self, item):
# print(item)
# return self.info[item]
#
# obj = Foo()
# obj.name = 'alex'
# print(obj.name)
v = []
for i in range(10000):
v.append(i)
print(v)
View Code
# by luffycity.com
import multiprocessing
import threading
# ##################### 进程间的数据不共享 #####################
"""
data_list = []
def task(arg):
data_list.append(arg)
print(data_list)
def run():
for i in range(10):
p = multiprocessing.Process(target=task,args=(i,))
# p = threading.Thread(target=task,args=(i,))
p.start()
if __name__ == '__main__':
run()
"""
# ##################### 进程常用功能 #####################
"""
import time
def task(arg):
time.sleep(2)
print(arg)
def run():
print('111111111')
p1 = multiprocessing.Process(target=task,args=(1,))
p1.name = 'pp1'
p1.start()
print('222222222')
p2 = multiprocessing.Process(target=task, args=(2,))
p2.name = 'pp2'
p2.start()
print('333333333')
if __name__ == '__main__':
run()
"""
# ##################### 通过继承方式创建进程 #####################
class MyProcess(multiprocessing.Process):
def run(self):
print('当前进程',multiprocessing.current_process())
def run():
p1 = MyProcess()
p1.start()
p2 = MyProcess()
p2.start()
if __name__ == '__main__':
run()
# by luffycity.com
import multiprocessing
import threading
import queue
import time
# ##################### 进程间的数据共享:multiprocessing.Queue #####################
"""
q = multiprocessing.Queue()
def task(arg,q):
q.put(arg)
def run():
for i in range(10):
p = multiprocessing.Process(target=task, args=(i, q,))
p.start()
while True:
v = q.get()
print(v)
run()
"""
# ##################### 进程间的数据共享:Manager #####################
"""
def task(arg,dic):
time.sleep(2)
dic[arg] = 100
if __name__ == '__main__':
m = multiprocessing.Manager()
process_list = []
for i in range(10):
p = multiprocessing.Process(target=task, args=(i,dic,))
p.start()
process_list.append(p)
while True:
count = 0
for p in process_list:
if not p.is_alive():
count += 1
if count == len(process_list):
break
print(dic)
# ...
"""
# ##################### 进程间的数据其他电脑 #####################
"""
def task(arg,dic):
pass
if __name__ == '__main__':
while True:
# 连接上指定的服务器
# 去机器上获取url
url = 'adfasdf'
p = multiprocessing.Process(target=task, args=(url,))
p.start()
"""
# by luffycity.com
import time
import threading
import multiprocessing
lock = multiprocessing.RLock()
def task(arg):
print('鬼子来了')
lock.acquire()
time.sleep(2)
print(arg)
lock.release()
if __name__ == '__main__':
p1 = multiprocessing.Process(target=task,args=(1,))
p1.start()
p2 = multiprocessing.Process(target=task, args=(2,))
p2.start()
# by luffycity.com
import time
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def task(arg):
time.sleep(2)
print(arg)
if __name__ == '__main__':
pool = ProcessPoolExecutor(5)
for i in range(10):
pool.submit(task,i)
# by luffycity.com
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
# 模拟浏览器发送请求
# 内部创建 sk = socket.socket()
# 和抽屉进行socket连接 sk.connect(...)
# sk.sendall('...')
# sk.recv(...)
def task(url):
print(url)
r1 = requests.get(
url=url,
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'
}
)
# 查看下载下来的文本信息
soup = BeautifulSoup(r1.text,'html.parser')
print(soup.text)
# content_list = soup.find('div',attrs={'id':'content-list'})
# for item in content_list.find_all('div',attrs={'class':'item'}):
# title = item.find('a').text.strip()
# target_url = item.find('a').get('href')
# print(title,target_url)
def run():
pool = ThreadPoolExecutor(5)
for i in range(1,50):
pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)
if __name__ == '__main__':
run()
# by luffycity.com
import multiprocessing
import time
def task(arg, dic):
time.sleep(2)
dic[arg] = 100
if __name__ == '__main__':
m = multiprocessing.Manager()
dic = {}
process_list = []
for i in range(10):
p = multiprocessing.Process(target=task, args=(i, dic,))
p.start()
print('end')