下载抖音无水印视频工具 python webdriver 2020.09.02

由于抖音更新了系统，之前的无法下载视频，下面是最新更正版本，以前用wget可以下载，现在改为用aria2c来下载

# coding=utf-8

from selenium import webdriver
import time, re, os, sys
reload(sys)
sys.setdefaultencoding("utf-8")

if len(sys.argv)==1:
    print 'input url'
    sys.exit()
url = sys.argv[1]

def main():
    options = webdriver.ChromeOptions()
    options.headless = True
    options.add_argument('log-level=3')
    options.add_experimental_option('excludeSwitches', ['enable-logging'])
    options.add_argument('user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25')

    chrome_driver = 'D:softChromePortable84.0.4147.105AppChrome-binchromedriver.exe'
    b = webdriver.Chrome(executable_path = chrome_driver,options=options)
    print 'Downloading page ...'
    b.get(url)
    time.sleep(1)
    page_source = b.page_source
    # print page_source
    b.quit()

    src = re.findall( r'<video class="video-player--..... hide--....." src="(.*?)"', page_source,  re.S)
    src2 = ""
    if src:
        src = "".join(src)
        print src
        src2 = src.replace('playwm','play')
        print src2
    else:
        print "No match src"
    userTitle = re.findall( r'<p class="desc--.....">(.*?)</p>', page_source,  re.S)
    if userTitle:
        userTitle = "".join(userTitle)
        print userTitle
    else:
        print "No match user Title"
    name = re.findall( r'<p class="author-name--.....">@(.*?)</p>', page_source,  re.S)
    if name:
        name = "".join(name)
        name = name.replace('@','')
        print name
    else:
        print "No match name"
    uid = re.findall( r'<p class="unique_id--.....">(.*?)</p>', page_source,  re.S)
    if uid:
        uid = "".join(uid)
        uid = uid.replace('抖音号：','')
        uid = uid.replace(' ','')
        print uid
    else:
        print "No match uid"

    print "downloading video aria2c"
    cmd = 'aria2c --user-agent="user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25" '+'"'+ src2 + '" -o "' + uid + '_' + name+'_'+userTitle+'.mp4"'
    cmd = cmd.decode('utf8').encode('gb2312')
    val = os.system(cmd)
    print val

if __name__ == '__main__':
    main()

相关阅读:
CentOS yum 安装svn1.8
js 替换掉汉字和替换非汉字比较时间JS
PhpStorm 10 破解
html中link的用法
CSS3:nth-child()伪类选择器
提示的小三角
css 高度自适应
<input type="file" />浏览时只显示指定文件类型
MySQL查询表内重复记录
扒站小工具

原文地址：https://www.cnblogs.com/nlsoft/p/13605181.html