• 微博爬取数据


    import requests
    from urllib.parse import urlencode
    from pyquery import PyQuery as pq
    #from pymongo import MongoClient

    base_url = 'https://m.weibo.cn/api/container/getIndex?'
    headers = {
    'Host': 'm.weibo.cn', #服务器根据Host这一行中的值来确定本次请求的是哪个具体的网站
    'Referer': 'https://m.weibo.cn/u/2830678474', #从哪里来
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest',
    }

    max_page = 10


    def get_page(page):
    params = {
    'type': 'uid',
    'value': '2830678474',
    'containerid': '1076032830678474',
    'page': page
    }
    url = base_url + urlencode(params) #拼接url
    print("拼接完成的url",url)
    try:
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
    #print(response.text)
    return response.json(), page
    except requests.ConnectionError as e:
    print('Error', e.args)


    def parse_page(json, page: int):
    if json:
    print("json************json",json)
    items = json.get('data').get('cards')
    for index, item in enumerate(items):
    if page == 1 and index == 1:
    continue
    else:
    item = item.get('mblog', {})
    weibo = {}
    weibo['id'] = item.get('id')
    weibo['text'] = pq(item.get('text')).text()
    weibo['attitudes'] = item.get('attitudes_count')
    weibo['comments'] = item.get('comments_count')
    weibo['reposts'] = item.get('reposts_count')
    yield weibo


    # def save_to_mongo(result):
    # if collection.insert(result):
    # print('Saved to Mongo')


    if __name__ == '__main__':
    for page in range(1, max_page + 1):
    json = get_page(page)
    results = parse_page(*json)
    for result in results:
    print(result)
    #save_to_mongo(result)
  • 相关阅读:
    [bzoj1500][luogu2042][cogs339][codevs1758]维修数列(维护数列)
    无旋treap的简单思想以及模板
    [hdu2036]改革春风吹满地
    (treap)[bzoj3224][洛谷3369][cogs1829]Tyvj 1728 普通平衡树
    [bzoj3875][Ahoi2014]骑士游戏
    [bzoj1433][ZJOI2009]假期的宿舍
    <struct、union、enum>差异
    LeetCode(50) Pow(x,n)
    LeetCode(49)Group Anagrams
    LeetCode(48)Rotate Image
  • 原文地址:https://www.cnblogs.com/yuanjia8888/p/9999745.html
Copyright © 2020-2023  润新知