# !/usr/bin/env python3
# -*- coding: utf-8 -*-
# @Time : 2020/12/21 上午8:51
# @Author : SR
# @Email : srcoder@1163.com
# @File : spider.py
# @Software: PyCharm
import os
import requests
from multiprocessing.pool import ThreadPool
class SpiderMovieFromChenYu:
def __init__(self, save_ts_path, save_movie_path, fail_ts_list=[], ):
self.save_ts_path = save_ts_path
self.save_movie_path = save_movie_path
self.fail_ts_list = fail_ts_list
self.headers = {
'Referer': 'http://www.chenyutv.com/',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
}
def mkdir_directory(self):
if not os.path.exists(self.save_ts_path):
os.mkdir(self.save_ts_path)
if not os.path.exists(self.save_movie_path):
os.mkdir(self.save_movie_path)
def get_ts(self, number, flag=False):
play_url = 'https://sina.com-h-sina.com/20180815/9998_f9aa34bf/1000k/hls/c0cdc4673f4%03d.ts' % number
ts_number = play_url.split('/')[-1] # 获取ts编号
if ts_number not in os.listdir(self.save_ts_path): # 判断该ts是否已经下载
try:
session = requests.session()
response = session.get(play_url, headers=self.headers, timeout=60) # 进行数据请求
if response.status_code == 200:
with open(os.path.join(self.save_ts_path, ts_number), 'wb') as f: # 读取文件
f.write(response.content) # 写入数据
f.close()
if flag: # 判断失败的ts再一次下载是否成功
self.fail_ts_list.remove(number) # 如果成功从失败列表移除
except Exception as e:
# 判断失败的ts文件序号是否已经存在在失败的列表下
if number not in self.fail_ts_list:
# 不存在添加到ts列表中
self.fail_ts_list.append(number)
def check_ts(self):
print("开始检查:")
print(self.fail_ts_list)
while self.fail_ts_list: # 通过判断列表是否有值进行数据循环
for number in self.fail_ts_list: # 获取单个的ts文件序号
self.get_ts(number, True) # 数据下载
print("%s:下载完毕" % number)
print(self.fail_ts_list)
print("ts 文件下载完成!")
self.get_video() # 下载成功之后将数据转换成mp4文件
def get_video(self):
ts_list = os.listdir(self.save_ts_path) # 获取所有的ts文件
ts_list.sort() # 将ts文件进行排序
ts_lists = [ts for ts in ts_list]
for ts in ts_lists:
with open(os.path.join(self.save_ts_path, ts), 'rb') as f1:
with open(os.path.join(self.save_movie_path, '明日的我与昨日你的约会.mp4'), 'ab') as f2:
f2.write(f1.read())
print("%s:写入完成" % ts)
if __name__ == '__main__':
min_number = int(input('请输入ts的起始数字>>:').strip())
max_number = int(input('请输入ts的结尾数字>>:').strip()) + 1
save_ts_path = input('请输入ts保存文件路径>>:').strip()
save_movie_path = input('请输入视频保存文件路径>>:').strip()
spider = SpiderMovieFromChenYu(save_ts_path, save_movie_path)
spider.mkdir_directory()
pool = ThreadPool(100)
pool.map(spider.get_ts, range(min_number, max_number))
pool.close()
pool.join()
spider.get_ts()