import requests
import os
from lxml import etree
url = 'https://www.pearvideo.com/category_5'
headers = {
"User-Agent":'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0'
}
if not os.path.exists('./images'):
os.mkdir('./images')
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
li_list = tree.xpath('//ul[@id="listvideoListUl"]/li')
video_list1 = []
for li in li_list:
each = li.xpath('./div/a/@href')[0]
url_num = each.replace('video_', "")
name = li.xpath('./div/a/div[2]/text()')[0] + '.mp4'
dic = {
"url_num":url_num,
"name":name
}
video_list1.append(dic)
print(video_list1)
target = "https://www.pearvideo.com/videoStatus.jsp?contId="
for dic_data in video_list1:
new_url = target + dic_data['url_num']
# print(new_url)
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0',
'Referer': 'https://www.pearvideo.com/video_' + dic_data['url_num']
}
url_data = requests.get(url=new_url,headers=headers).json()
srcUrl = url_data['videoInfo']['videos']['srcUrl']
cont = 'cont-' + dic_data['url_num']
new1_url = srcUrl.replace(srcUrl.split("-")[0].split("/")[-1], cont)
print(new1_url)
# requests.get(url=new1_url)
# 使用视频后缀当视频名称
# filename = srcUrl.split("/")[-1]
filename = dic_data["name"]
# 保存到本地
video_data = requests.get(new1_url,headers=headers).content
with open("./images/" + filename, "wb") as f:
f.write(video_data)