• 爬取ts类型视频文件并且合并成mp4文件


    # !/usr/bin/env python3
    # -*- coding: utf-8 -*-
    # @Time : 2020/12/21 上午8:51
    # @Author : SR
    # @Email : srcoder@1163.com
    # @File : spider.py
    # @Software: PyCharm
    
    import os
    
    import requests
    
    from multiprocessing.pool import ThreadPool
    
    
    class SpiderMovieFromChenYu:
        def __init__(self, save_ts_path, save_movie_path, fail_ts_list=[], ):
    
            self.save_ts_path = save_ts_path
            self.save_movie_path = save_movie_path
            self.fail_ts_list = fail_ts_list
    
            self.headers = {
                'Referer': 'http://www.chenyutv.com/',
    
                'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
            }
    
        def mkdir_directory(self):
            if not os.path.exists(self.save_ts_path):
                os.mkdir(self.save_ts_path)
            if not os.path.exists(self.save_movie_path):
                os.mkdir(self.save_movie_path)
    
        def get_ts(self, number, flag=False):
    
            play_url = 'https://sina.com-h-sina.com/20180815/9998_f9aa34bf/1000k/hls/c0cdc4673f4%03d.ts' % number
    
            ts_number = play_url.split('/')[-1]  # 获取ts编号
    
            if ts_number not in os.listdir(self.save_ts_path):  # 判断该ts是否已经下载
    
                try:
                    session = requests.session()
                    response = session.get(play_url, headers=self.headers, timeout=60)  # 进行数据请求
    
                    if response.status_code == 200:
    
                        with open(os.path.join(self.save_ts_path, ts_number), 'wb') as f:  # 读取文件
                            f.write(response.content)  # 写入数据
                            f.close()
                            if flag:  # 判断失败的ts再一次下载是否成功 
                                self.fail_ts_list.remove(number)  # 如果成功从失败列表移除
                except Exception as e:
    
                    #  判断失败的ts文件序号是否已经存在在失败的列表下
                    if number not in self.fail_ts_list:
                        #  不存在添加到ts列表中
                        self.fail_ts_list.append(number)
    
        def check_ts(self):
            print("开始检查:")
            print(self.fail_ts_list)
            while self.fail_ts_list:  # 通过判断列表是否有值进行数据循环
                for number in self.fail_ts_list:  # 获取单个的ts文件序号
                    self.get_ts(number, True)  # 数据下载
                    print("%s:下载完毕" % number)
                    print(self.fail_ts_list)
            print("ts 文件下载完成!")
            self.get_video()  # 下载成功之后将数据转换成mp4文件
    
        def get_video(self):
    
            ts_list = os.listdir(self.save_ts_path)  # 获取所有的ts文件
            ts_list.sort()  # 将ts文件进行排序
            ts_lists = [ts for ts in ts_list]
    
            for ts in ts_lists:
                with open(os.path.join(self.save_ts_path, ts), 'rb') as f1:
                    with open(os.path.join(self.save_movie_path, '明日的我与昨日你的约会.mp4'), 'ab') as f2:
                        f2.write(f1.read())
                        print("%s:写入完成" % ts)
    
    
    if __name__ == '__main__':
        min_number = int(input('请输入ts的起始数字>>:').strip())
        max_number = int(input('请输入ts的结尾数字>>:').strip()) + 1
        save_ts_path = input('请输入ts保存文件路径>>:').strip()
        save_movie_path = input('请输入视频保存文件路径>>:').strip()
    
        spider = SpiderMovieFromChenYu(save_ts_path, save_movie_path)
        spider.mkdir_directory()
        pool = ThreadPool(100)
        pool.map(spider.get_ts, range(min_number, max_number))
        pool.close()
        pool.join()
    
        spider.get_ts()
  • 相关阅读:
    poj 1475 Pushing Boxes 推箱子(双bfs)
    poj 1806 Frequent values(RMQ 统计次数) 详细讲解
    poj 2846 Repository
    poj Ping pong LA 4329 (树状数组统计数目)
    POJ 1962-Corporative Network (并查集)
    hdu 2217 Visit
    nyoj304 节能
    与R纠缠的两件事——rownames和子集--转载
    七步精通Python机器学习--转载
    win10专业版激活(亲测可用)
  • 原文地址:https://www.cnblogs.com/SR-Program/p/14171353.html
Copyright © 2020-2023  润新知