• 爬取虎牙直播同一时间段的所有直播间信息


    import requests
    from openpyxl import Workbook
    from bs4 import BeautifulSoup
    from tqdm import tqdm


    class LiveSpider():
    def super_spider(self):
    headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/85.0.4183.102 Safari/537.36",
    "x-requested-with": "XMLHttpRequest"
    }
    Response = requests.get("https://www.huya.com/g")
    html = Response.text
    soup = BeautifulSoup(html, "html.parser")
    for gameId in tqdm(range(1, len(soup.find_all("li")))):
    worker = Workbook()
    wk = worker.active
    wk.append(["直播类型", "直播房间名", "房间号", "主播昵称", "直播介绍", "人流量"])
    params = {
    "m": "LiveList",
    "do": "getLiveListByPage",
    "gameId": gameId,
    "tagAll": "0",
    "page": "1"
    }

    response = requests.get("https://www.huya.com/cache.php", params=params, headers=headers)

    for page in range(1, int(response.json()["data"]["totalPage"]) + 1):
    params["page"] = str(page)
    live_response = requests.get("https://www.huya.com/cache.php", params=params, headers=headers)
    live_number = len(live_response.json()["data"]["datas"])
    for num in range(0, live_number):
    live = live_response.json()["data"]["datas"][num]
    lst = [live["gameFullName"], live["roomName"], live["uid"], live["nick"], live["introduction"],
    live["totalCount"]]
    try:
    wk.append(lst)
    except:
    pass
    worker.save(f"huya_live_{gameId}.xlsx")


    l = LiveSpider()
    l.super_spider()
  • 相关阅读:
    基于Apache+php+mysql的许愿墙网站的搭建
    关于php留言本网站的搭建
    httpd服务的安装、配置
    linux下面桌面的安装
    时间同步ntp服务的安装与配置
    通过挂载系统光盘搭建本地yum仓库的方法
    linux系统root用户忘记密码的重置方法
    linux系统的初化始配置
    Linux下通过PXE服务器安装Linux系统
    Linux设置RSA密钥登录
  • 原文地址:https://www.cnblogs.com/liuyuchao/p/14017154.html
Copyright © 2020-2023  润新知