• 下载抖音无水印视频工具 python webdriver 2020.09.02


    由于抖音更新了系统,之前的无法下载视频,下面是最新更正版本,以前用wget可以下载,现在改为用aria2c来下载

    # coding=utf-8
    
    from selenium import webdriver
    import time, re, os, sys
    reload(sys)
    sys.setdefaultencoding("utf-8")
    
    if len(sys.argv)==1:
        print 'input url'
        sys.exit()
    url = sys.argv[1]
    
    def main():
        options = webdriver.ChromeOptions()
        options.headless = True
        options.add_argument('log-level=3')
        options.add_experimental_option('excludeSwitches', ['enable-logging'])
        options.add_argument('user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25')
    
        chrome_driver = 'D:softChromePortable84.0.4147.105AppChrome-binchromedriver.exe'
        b = webdriver.Chrome(executable_path = chrome_driver,options=options)
        print 'Downloading page ...'
        b.get(url)
        time.sleep(1)
        page_source = b.page_source
        # print page_source
        b.quit()
    
        src = re.findall( r'<video class="video-player--..... hide--....." src="(.*?)"', page_source,  re.S)
        src2 = ""
        if src:
            src = "".join(src)
            print src
            src2 = src.replace('playwm','play')
            print src2
        else:
            print "No match src"
        userTitle = re.findall( r'<p class="desc--.....">(.*?)</p>', page_source,  re.S)
        if userTitle:
            userTitle = "".join(userTitle)
            print userTitle
        else:
            print "No match user Title"
        name = re.findall( r'<p class="author-name--.....">@(.*?)</p>', page_source,  re.S)
        if name:
            name = "".join(name)
            name = name.replace('@','')
            print name
        else:
            print "No match name"
        uid = re.findall( r'<p class="unique_id--.....">(.*?)</p>', page_source,  re.S)
        if uid:
            uid = "".join(uid)
            uid = uid.replace('抖音号:','')
            uid = uid.replace(' ','')
            print uid
        else:
            print "No match uid"
    
        print "downloading video aria2c"
        cmd = 'aria2c --user-agent="user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25" '+'"'+ src2 + '" -o "' + uid + '_' + name+'_'+userTitle+'.mp4"'
        cmd = cmd.decode('utf8').encode('gb2312')
        val = os.system(cmd)
        print val
    
    if __name__ == '__main__':
        main()
  • 相关阅读:
    CentOS yum 安装svn1.8
    js 替换掉汉字 和替换非汉字 比较时间JS
    PhpStorm 10 破解
    html中link的用法
    CSS3:nth-child()伪类选择器
    提示的小三角
    css 高度自适应
    <input type="file" />浏览时只显示指定文件类型
    MySQL查询表内重复记录
    扒站小工具
  • 原文地址:https://www.cnblogs.com/nlsoft/p/13605181.html
Copyright © 2020-2023  润新知