from concurrent.futures import ThreadPoolExecutor
import requests
import re
import uuid
pool = ThreadPoolExecutor(200)
# 1.发送请求函数
def get_page(url):
response = requests.get(url)
return response
# 2.解析主页获取视频ID号
def parse_index(response):
id_list = re.findall(
'<a href="video_(.*?)".*?>',
response.text,
re.S
)
return id_list
# 3.解析视频详情页获取真实 视频链接
def parse_detail(res):
response = res.result()
movie_detail_url = re.findall('srcUrl="(.*?)"', response.text, re.S)[0]
print(f'往视频链接: {movie_detail_url}发送请求...')
# 异步往视频详情页链接发送请求,把结果交给
pool.submit(get_page, movie_detail_url).add_done_callback(save_movie)
return movie_detail_url
# 4.往真实视频链接发送请求,获取数据并保存到本地
def save_movie(res):
movie_response = res.result()
# print(1111)
# movie_response = get_page(movie_detail_url)
# print(movie_response)
name = str(uuid.uuid4())
print(f'{name}.mp4视频开始保存...')
with open(f'{name}.mp4', 'wb') as f:
f.write(movie_response.content)
print('视频下载完毕!')
if __name__ == '__main__':
# 1.访问主页获取数据
index_response = get_page('https://www.pearvideo.com/')
# # 2.解析主页获取所有的视频id号
id_list = parse_index(index_response)
print(id_list)
# 3.循环对每个视频详情页链接进行拼接
for id in id_list:
print(id)
detail_url = 'https://www.pearvideo.com/video_' + id
# 异步提交爬取视频详情页,把返回的数据,交给parse_detail(回调函数)
pool.submit(get_page, detail_url).add_done_callback(parse_detail)