• 获取微博广告博文数据


    import requests
    import json
    import pandas as pd
    import time
    import re 
    
    headers = {
        'User-Agent': 'XXXX',
        'Cookie': 'XXX'}
    
    def get_ad(page):
        url = 'https://m.weibo.cn/api/container/getIndex?containerid=102803&openApp=0&since_id={}'.format(page)
    
        res = requests.get(url,headers=headers)
        data = json.loads(res.text)
        all_ad = []
        for k,i in enumerate(data['data']['cards']):
            aa = i['mblog'].get('from_cateid')
            if aa in ['Brand','Sfst','FanstopExtend','Wax']:
                result={}
                pattern = re.compile(r'<.*?>') 
                cc = pattern.sub('',i['mblog']['text'])
                result['uid'] = i['mblog']['user']['id']
                result['昵称'] = i['mblog']['user']['screen_name']
                result['排名'] = k+1
                result['出现页数'] = page+1
                result['博文'] = cc
                result['转发数'] = i['mblog']['reposts_count']
                result['评论数'] = i['mblog']['comments_count']
                result['点赞数'] = i['mblog']['attitudes_count']
                all_ad.append(result)
        return all_ad
    
    all_data = []
    
    for i in range(50):
        time.sleep(1)
        print(len(all_data))
        fina_data = get_ad(i)
        if fina_data:
            all_data+=fina_data
    
    df1 =pd.DataFrame(all_data)
    df1.to_excel('result'+time.strftime("%Y%m%d%H%M%S")+'.xlsx',index=False)
    print('done')
  • 相关阅读:
    类的多重继承
    实例属性和类属性
    协程
    nginx安装与配置
    Linux系统优化及状态监控
    MongoDb安全配置:简单的身份认证
    MongoDB YAML格式的配置文件
    yum使用,使用rpm指令安装rpm,使用dpkg指令安装deb
    MongoDB默认配置
    被锐速加防火墙坑了一下。。。
  • 原文地址:https://www.cnblogs.com/Erick-L/p/9057386.html
Copyright © 2020-2023  润新知