• Python 爬虫 Vimeo视频下载链接


    python vimeo_d.py https://vimeo.com/228013581  

    在https://vimeo.com/上看到稀罕的视频 按照上面加上视频的观看地址运行即可获得视频下载链接 支持三种方式爬取下载

     (为了凑够150字+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++) 

     1 #coding:utf-8
     2 #sample url = 'https://vimeo.com/228013581'
     3 
     4 
     5 import requests
     6 import json
     7 import sys
     8 import re
     9 reload(sys)
    10 sys.setdefaultencoding('utf-8')
    11 
    12 headers ={
    13     'Accept-Encoding':'gzip, deflate, br',
    14     'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6',
    15     'Connection':'keep-alive',
    16     'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36'
    17 }
    18 
    19 def process_1(cmd,url,headers):
    20     _url = '%s?action=load_download_config' % url
    21 
    22     headers['Host']='vimeo.com'
    23     headers['Referer']='%s' % url
    24     headers['X-Requested-With']='XMLHttpRequest'
    25 
    26     r = requests.get(_url,headers=headers)
    27     if str(r.status_code) =='200':
    28         f = r.json()['files']
    29         for i in range(0,len(f)):
    30             ml = f[i]
    31             print "第%s条Download信息" % (i+1)
    32             print '标题: %s' % ml['download_name']
    33             print '链接: %s' % ml['download_url']
    34             print ml['download_name'].split('_')[0]
    35 
    36 def process_2(cmd,url,headers):
    37     _url = 'http://savevideo.me/get/'
    38 
    39     headers['Content-Type']='application/x-www-form-urlencoded; charset=UTF-8'
    40     headers['Host']='savevideo.me'
    41     headers['Origin']='http://savevideo.me'
    42     headers['Referer']='http://savevideo.me/'
    43 
    44     r = requests.post(_url,headers=headers,data={'url':url})
    45     if str(r.status_code) == '200':
    46         title = re.findall(r"<a.*?href="(.*?)">D.*?</a>",r.text,re.S)
    47         print title
    48 
    49 def process_3(cmd,url,headers):
    50     _url = url
    51 
    52     r = requests.get(_url)
    53     if str(r.status_code)=='200':
    54         title = re.findall(r"<script>.*?"config_url.*?"(http.*?)".*?",r.text,re.S)
    55         _url = title[0].replace('\/\/player','//player').replace('\/video\/','/video/').replace('\/config','/config')
    56         _r = requests.get(_url)
    57         if str(_r.status_code) == '200':
    58             f = _r.json()['request']['files']['progressive']
    59             q = _r.json()['video']['title']
    60             print '标题:%s' % q
    61             for i in range(0,len(f)):
    62                 width = f[i]['width']
    63                 height= f[i]['height']
    64                 url = f[i]['url']
    65                 quality = f[i]['quality']
    66                 print "第%s条Download信息" % (i+1)
    67                 print "质量%s*%s(mp4)" % (width,height)
    68                 print "链接%s" % (url)
    69 
    70 
    71 if __name__ == '__main__':
    72     import sys
    73     ml = eval("'parse',sys.argv[1],headers")
    74     cmd, url, headers = ml
    75     process_1(cmd,url,headers)
    76     process_2(cmd,url,headers)
    77     process_3(cmd,url,headers)
  • 相关阅读:
    String
    【CLR】奇妙的String
    【Siverlight
    【WPF】ContentControl Style定义与使用出现问题后 -- 引发的思考
    WPF 简介
    《Java从入门到失业》第一章:计算机基础知识(1.1):二进制和十六进制
    《Java从入门到失业》第五章:继承与多态(5.8-5.10):多态与Object类
    《Java从入门到失业》第五章:继承与多态(5.1-5.7):继承
    《Java从入门到失业》第四章:类和对象(4.6):类路径
    《Java从入门到失业》第四章:类和对象(4.5):包
  • 原文地址:https://www.cnblogs.com/cutesnow/p/7338176.html
Copyright © 2020-2023  润新知