• 模拟ajax请求爬取微博


    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time    : 2018/9/26 10:26
    # @Author  : Sa.Song
    # @Desc    : 抓取崔庆才微博, 模拟ajax请求
    # @File    : weiBo.py
    # @Software: PyCharm
    
    import requests
    from urllib.parse import urlencode
    from pyquery import PyQuery as pq
    headers = {
        'Accept':'application/json, text/plain, */*',
        'Referer':'https://m.weibo.cn/u/2830678474',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
        'X-Requested-With':'XMLHttpRequest',
    }
    base_url = 'https://m.weibo.cn/api/container/getIndex?'
    
    def get_page(page): #page是分页
        parms = {
            'type':'uid',
            'value':'2830678474',
            'containerid':'1076032830678474',
            'page':page
        }
        url = base_url + urlencode(parms)
        try:
            response = requests.get(url=url, headers=headers)
            if response.status_code == 200:
                return response.json()
        except Exception as e:
            print('报错:', e)
    
    def parse_message(json):
        weibo_data = [] 
        if json:
            items = json.get('data').get('cards')
            for item in items:
                item = item.get('mblog')
                if item == None:
                    continue
                else:
                    weibo_message = {}
                    weibo_message['id'] = item.get('id')
                    weibo_message['text'] = pq(item.get('text')).text()
                    weibo_data.append(weibo_message)
        print(weibo_data)
    
    if __name__ == '__main__':
        for i in range(100):
            json = get_page(i)
            parse_message(json)
  • 相关阅读:
    UVA 1001 Say Cheese
    UVa 821 Page Hopping
    UVA 1569 Multiple
    UVA 1395 Slim Span
    UVA 12219 Common Subexpression Elimination
    UVA 246 10-20-30
    Mysql基本操作
    浅析关键字static
    面试回答技巧
    五个程序员好习惯
  • 原文地址:https://www.cnblogs.com/ss-py/p/9706722.html
Copyright © 2020-2023  润新知