• 微博根据关键字搜索爬虫


    1.登录获取cookies
    2.cookie转cookies

    # -*- coding: utf-8 -*-
    # TODO cookies_str转cookies_dic
    # @Date    : 2022/4/22 9:38
    # @Author  : layman
    cookies_str = "SINAGLOBAL=462092313429110.737.1648189947190; login_sid_t=799d349cdfsd25759903d131ca6fd0ad0; cross_origin_proto=SSL; _s_tentry=weibo.com; Apache=8348613412866.332.1650589816565; ULV=1650589816569:2:1:1:8348613412866.332.1650589816565:1648189947200; SUB=_2A25PZnDJDeRhGeFN6VUW-S_Kyj6IHXVsEuUBrDV8PUNbmtAKLUL6kW9NQFh55mlCd6g7TuU659NR2F5DNWShYC_i; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WF4kv-4n5KEAdq3XeiQfdqc5JpX5KzhUgL.FoM0eoMN1K2ceKz2dJLoI7LbIgUjqPL_qgRt; ALF=1682125848; SSOLoginState=1650589849; wvr=6; webim_unReadCount=%7B%22time%22%3A1650589853165%2C%22dm_pub_total%22%3A9%2C%22chat_group_client%22%3A0%2C%22chat_group_notice%22%3A0%2C%22allcountNum%22%3A32%2C%22msgbox%22%3A0%7D; PC_TOKEN=0d19237494; WBStorage=4d96c54e|undefined"
    
    cookies_dic = {}
    for cookie in cookies_str.split('; '):
        cookies_dic[cookie.split('=')[0]] = cookie.split('=')[-1]
    
    print(cookies_dic)
    
    

    3.爬取收集

    # -*- coding: utf-8 -*-
    # TODO 微博查询
    # @Date    : 2022/4/22 9:12
    # @Author  : layman
    import json
    import time
    
    import pandas as pd
    import pymysql
    import requests
    from lxml import etree
    
    headers = {
        'referer': 'https://s.weibo.com/user?q=%E5%AE%9C%E6%98%8C&Refer=weibo_user',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36',
    }
    cookies = {'SINAGLOBAL': '462092384310.737.1648189947190', 'login_sid_t': '799d349cf324w903d131ca6fd0ad0',
               'cross_origin_proto': 'SSL', 'PC_TOKEN': 'c797273222', '_s_tentry': 'weibo.com',
               'Apache': '8348613412866.332.1650589816565',
               'ULV': '1650589816569:2:1:1:8348613412866.332.1650589816565:1648189947200',
               'SUB': '_2A25PZnDJDeRhewrN6VUW-S_Kyj6IHXVsEuUBrDV8PUNbmtAKLUL6kW9NQFh55mlCd6g7TuU659NR2F5DNWShYC_i',
               'SUBP': '0033WrSXqPxfM725Ws9jqgMF55529P9D9WF4kv-4n5KEAdq3XeiQfdqc5JpX5KzhUgL.FoM0eoMN1K2ceKz2dJLoI7LbIgUjqPL_qgRt',
               'ALF': '1682125848', 'SSOLoginState': '1650589849', 'wvr': '6',
               'webim_unReadCount': '%7B%22time%22%3A1650589853165%2C%22dm_pub_total%22%3A9%2C%22chat_group_client%22%3A0%2C%22chat_group_notice%22%3A0%2C%22allcountNum%22%3A32%2C%22msgbox%22%3A0%7D',
               'WBStorage': '4d96c54e|undefined'}
    db = pymysql.connect(host='localhost', port=3306,
                         user='root', passwd='root', db='wxb', charset='utf8')
    
    cursor = db.cursor()
    for page in range(1, 51):
        resp = requests.get(url=f'https://s.weibo.com/user?q=%E5%AE%9C%E6%98%8C&Refer=weibo_user&page={page}',
                            cookies=cookies)
        time.sleep(1)
        html = etree.HTML(resp.text)
        try:
            user_list = html.xpath('//*[@id="pl_user_feedList"]')[0]
            for user_name, official, user_fans in zip(user_list.xpath('./div[*]/div[2]/div/a[1]/text()'),
                                                      user_list.xpath('./div[*]/div[2]/p[2]/text()'),
                                                      user_list.xpath('./div[*]/div[2]/p[3]/span[2]/a/text()')):
                # user_name = user_list.xpath('./div[*]/div[2]/div/a[1]/text()')
                # user_fans = user_list.xpath('./div[*]/div[2]/p[3]/span[2]/a/text()')
                print(official)
                if official is None or len(str(official).strip()) == 0:
                    official = '非官微'
                values = (user_name, official, user_fans)
                try:
                    sql = "INSERT INTO weibo(user_name, official, user_fans) VALUES (%s,%s,%s)"
                    cursor.execute(sql, values)
                    db.commit()
                except:
                    pass
        except:
            pass
    
    
  • 相关阅读:
    OSI模型白话
    并发
    初始化与清理
    多线程
    recyclerview Adapter
    recyclerview刷新
    surfaceview
    viewgroup绘制流程
    view配置
    项目遇到的问题
  • 原文地址:https://www.cnblogs.com/shun998/p/16186209.html
Copyright © 2020-2023  润新知