由于抖音更新了系统,之前的无法下载视频,下面是最新更正版本,以前用wget可以下载,现在改为用aria2c来下载
# coding=utf-8 from selenium import webdriver import time, re, os, sys reload(sys) sys.setdefaultencoding("utf-8") if len(sys.argv)==1: print 'input url' sys.exit() url = sys.argv[1] def main(): options = webdriver.ChromeOptions() options.headless = True options.add_argument('log-level=3') options.add_experimental_option('excludeSwitches', ['enable-logging']) options.add_argument('user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25') chrome_driver = 'D:softChromePortable84.0.4147.105AppChrome-binchromedriver.exe' b = webdriver.Chrome(executable_path = chrome_driver,options=options) print 'Downloading page ...' b.get(url) time.sleep(1) page_source = b.page_source # print page_source b.quit() src = re.findall( r'<video class="video-player--..... hide--....." src="(.*?)"', page_source, re.S) src2 = "" if src: src = "".join(src) print src src2 = src.replace('playwm','play') print src2 else: print "No match src" userTitle = re.findall( r'<p class="desc--.....">(.*?)</p>', page_source, re.S) if userTitle: userTitle = "".join(userTitle) print userTitle else: print "No match user Title" name = re.findall( r'<p class="author-name--.....">@(.*?)</p>', page_source, re.S) if name: name = "".join(name) name = name.replace('@','') print name else: print "No match name" uid = re.findall( r'<p class="unique_id--.....">(.*?)</p>', page_source, re.S) if uid: uid = "".join(uid) uid = uid.replace('抖音号:','') uid = uid.replace(' ','') print uid else: print "No match uid" print "downloading video aria2c" cmd = 'aria2c --user-agent="user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25" '+'"'+ src2 + '" -o "' + uid + '_' + name+'_'+userTitle+'.mp4"' cmd = cmd.decode('utf8').encode('gb2312') val = os.system(cmd) print val if __name__ == '__main__': main()