# -*- coding: utf-8 -*- """ Created on Fri Aug 28 17:21:10 2020 @author: Mto """ """ 网址:http://www.yhdm.tv/ 目的 获取视频文件 8月28日,代码功能基本实现 """ import requests import re from bs4 import BeautifulSoup def getHTML(url): """访问网站获取页面,返回页面""" header = { 'Host':'www.yhdm.tv', 'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0' } r = requests.get(url,headers=header) r.encoding = 'utf-8' return r def GetMp4HTML(url): """访问网站获取页面,返回页面""" header = { 'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0' } r = requests.get(url,headers=header) r.encoding = 'utf-8' print(r.status_code) return r def processHTML(r): """使用bs4进行处理""" soup = BeautifulSoup(r.text,'html.parser') return soup def GetLinksAndTitle(url1,title,links): r = getHTML(url1) soup = processHTML(r) print('要下载的动漫为:'+soup.h1.string) a = soup.findAll(style="display:block") href = a[0].find_all('a') for i in range(0,len(href)): title.append(href[i].text) links.append('http://www.yhdm.tv' + href[i]['href']) def getmp4(link): """提取页面中的视频文件链接""" r = getHTML(link) soup = processHTML(r) s = soup.select('div#playbox') geturl = re.compile('^https.*\.mp4') mo = geturl.search(s[0].attrs['data-vid']) return(str(mo.group())) #download(str(mo.group()), title) def download(mp4link,title): """下载视频""" #r = GetMp4HTML(mp4link) print(mp4link) print(title+'模拟访问成功,不下了,放过那个可怜的服务器吧') # try: # with open(title+'.mp4','wb') as f: # f.write(r.content) # print(title+'下载成功') # except: # print(title+'下载失败') def main(): title = [] links=[] url = 'http://www.yhdm.tv/show/4790.html' GetLinksAndTitle(url, title, links) for i in range(0,3): mp4link = getmp4(links[i]) download(mp4link, title[i]) main()