python requests库 爬取视频
一、总结
一句话总结:
爬取视频操作和爬取图片操作比较类似,我们可以设置请求中的stream参数来选择以一整个块的方式来爬取视频或者以流的方式爬取
# 显示下载视频的进度 import requests headers = { "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", } url ="https://video.pearvideo.com/mp4/adshort/20200709/cont-1684816-15252785_adpkg-ad_hd.mp4" response = requests.get(url,headers=headers,stream=True) print(response.status_code) print(response.headers['content-length']) content_size = int(response.headers['content-length']) # print(response.text) # print(response.content) n = 1 with open("v.mp4","wb") as f: for i in response.iter_content(chunk_size=1024): rate=n*1024/content_size print("下载进度:{0:%}".format(rate)) f.write(i) n+=1 print("下载完成")
1、爬虫如何获取视频的大小?
用响应头里面的content-length属性即可,即response.headers['content-length']
二、python requests库 爬取视频
转自或参考:
import requests # 下载视频 def download(url): with requests.get(url, stream=True) as r: print('开始下载。。。') with open('v.mp4', 'wb')as f: for i in r.iter_content(chunk_size=1024): f.write(i) print('下载结束') # 带下载进度下载视频 def download_level2(url): with requests.get(url, stream=True) as r: print('开始下载。。。') content_size = int(r.headers['content-length']) with open('v.mp4', 'wb')as f: n = 1 for i in r.iter_content(chunk_size=1024): loaded = n * 1024.0 / content_size print(loaded) f.write(i) print('已下载{0:%}'.format(loaded)) n += 1 print('下载结束') if __name__ == '__main__': URL = 'http://tb-video.bdstatic.com/tieba-smallvideo-transcode/3853363_adac7ec8907890797b3970e570aba43a_140b8b74a014_3.mp4' # 下载视频 # download(URL) # 带下载进度 download_level2(URL)