• 抖音爬虫


    import requests
    import time
    import re
    import json
    import pandas as pd
    headers= {'user-agent': 'mobile'}
    def douyin_Spyder(id,url2):
        data=requests.get('http://aweme.snssdk.com/aweme/v1/user/?user_id={}&retry_type=retry_http&iid=59238161664&device_id=62578609382&ac=wifi&channel=aweGW&aid=1128&app_name=aweme&version_code=230&version_name=2.3.0&device_platform=android&ssmix=a&device_type=CHM-TL00H&device_brand=Honor&language=zh&os_api=19&os_version=4.4.4&uuid=745270478576539&openudid=589e358ee90e53&manifest_version_code=230&resolution=720*1280&dpi=320&update_version_code=2302&_rticket=1548395034447&ts={}&as=a1659a843a314c425a4355&cp=a518ca55a1a04624e1gkoo&mas=0141e7dcb9b69675674bffb55a194f1c3facaccc2c86ac4c2cc62c'.format(id,time.time()),headers=headers)
        data2 = requests.get(url2,headers=headers)
        data2=requests.get(url2,headers=headers)
        content=data.content.decode('utf-8')
        dict_json = json.loads(content)
        print('******主页数据******')
        print('粉丝数:',dict_json['user']['follower_count'])
        print('获赞数:', dict_json['user']['total_favorited'])
        content2 = data2.content.decode('utf-8')
        dict_json2 = json.loads(content2)
        aweme_list=dict_json2['aweme_list']
        print('******视频区数据******')
        comment_count_list=[]
        digg_count_list = []
        share_count_list = []
        play_list = []
        forward_count_list = []
        id=[]
        desc=[]
        for i,key  in enumerate(aweme_list):
            # print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
            # print('标题:',key['desc'])
            # print('ID:',key['aweme_id'])
            video=key['video']['play_addr']['url_list'][1]
            print('视频地址:',key['video']['play_addr']['url_list'][1])
            res = requests.get(video, headers=headers)
            with open(r'C:/Users/Administrator/Desktop/B站视频/迪丽热巴'+str(i)+'.mp4', 'wb') as f:
                f.write(res.content)
            # print('分享地址:',key['share_url'])
            # print('评论数:',key['statistics']['comment_count'])
            # print('点赞数:', key['statistics']['digg_count'])
            # print('转发量:', key['statistics']['share_count'])
            # print('forward_count:', key['statistics']['forward_count'])
            comment_count_list .append(key['statistics']['comment_count'])
            digg_count_list.append(key['statistics']['digg_count'])
            share_count_list .append(key['statistics']['share_count'])
            forward_count_list.append( key['statistics']['forward_count'])
            play_list.append(key['share_url'])
            id.append(key['aweme_id'])
            desc.append(key['desc'])
        df = pd.DataFrame({'ID': id, '标题': desc,'链接地址':play_list,'评论数':comment_count_list,'点赞数':digg_count_list,'转发量':share_count_list})
        df=df.set_index('ID')
        tim=time.strftime('%Y-%m-%d',time.localtime(time.time()))
        df.to_excel('C:/Users/Administrator/Desktop/'+str(tim)+'-7.xlsx')
    
    
    if __name__ == '__main__':
        url2=input('url:')
        douyin_Spyder(79302973596,url2)
  • 相关阅读:
    需求变更的种类及应对方式
    SQL Server中连接远程表、查询其它服务器的数据、导入或导出到其它Sql Server服务器数据
    在IE9中MSWC.BrowserType组件无法识别Cookie的问题
    优秀软件的几个重要标准
    对待代码的态度反应着对待自己的态度
    应对企业不断变化的系统
    在SQL中插入®特殊字符
    如何让领导认识到测试的重要性,在沟通时要注意的几点
    男人要补肾,强肾健脑对能持久做程序
    你可能不知道的Visual Studio 2010使用技巧(VS2010的秘密)
  • 原文地址:https://www.cnblogs.com/snackpython/p/10329204.html
Copyright © 2020-2023  润新知