• 爬取饿了么区域商家分布


    这个简单,就是熟悉下数据格式保存,反反爬,

    https://mainsite-restapi.ele.me/v2/pois?

    extras%5B%5D=count&geohash=wx4g0bmjetr7&keyword=%E6%9C%9D%E9%98%B3&limit=20&type=nearby

    import urllib.request
    import os
    import json
    from openpyxl import Workbook
    from openpyxl import load_workbook

    keywordExcel = "C:UsersuyDesktoppy3爬虫饿了么keyword.xlsx" # 关键字检索外卖地点保存路径

    keywords = ["江干", "滨江"] # 关键字集合

    def reqsetting(): # 首先构造请求头headers,url目前暂时保存根路径

    # weburl = "https://mainsite-restapi.ele.me/v2/pois?"
    weburl = "https://www.ele.me/restapi/v2/pois?"
            # extras%5B%5D=count&geohash=wtmknpnr9yy3&keyword=%E6%BB%A8%E6%B1%9F&limit=20&type=nearby"
    # extra1="extras%5B%5D=count&geohash=wx4g0bmjetr7&keyword=%E6%9C%9D%E9%98%B3&limit=20&type=nearby"
    
    webheaders = {
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "zh-CN,zh;q=0.9,zh-TW;q=0.8",
        "Connection": "keep-alive",
        "Cookie": "ubt_ssid=ptvjtf67i9lr4uovi39wbvo83ty0239q_2019-02-18; _utrace=824a5a0d3496a33d798248e92c3d152f_2019-02-18; cna=PZ7vFIAQHgECAXueJlYerufe; track_id=1550466556|da0ddc135f632adfcaaeb3e72f35543e485d9b3b484492f856|898bc9f8ba51522ed41a4bd2fb7e039f; isg=BAIC-M_e6rep9_ZrR37SKPuYUwikeyfVgYwZokwaGXUon6kZNGPV_Qe-S5vjz36F",
        "Host": "mainsite-restapi.ele.me",
        "Origin": "https://www.ele.me",
        "Referer": "https://www.ele.me/home/",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36"
    }
    req = urllib.request.Request(url=weburl, headers=webheaders)
    
    return req
    

    def write2Excel(jsondata, title): # 根据不同的关键字将数据写入到excel中
    fileName = keywordExcel
    if (os.path.exists(fileName)):
    wb = load_workbook(fileName)
    else:
    wb = Workbook()

    ws = wb.create_sheet(title)
    ws.column_dimensions["A"].width = 10.0
    ws.append(["ID", "城市", "geohash", "名称", "地址", "商家总数", "经度", "纬度", "request_id", "short_address"])
    ws.column_dimensions["A"].width = 30.0
    ws.column_dimensions["B"].width = 10.0
    ws.column_dimensions["C"].width = 18.0
    ws.column_dimensions["D"].width = 20.0
    ws.column_dimensions["E"].width = 50.0
    ws.column_dimensions["F"].width = 10.0
    ws.column_dimensions["G"].width = 10.0
    ws.column_dimensions["H"].width = 10.0
    ws.column_dimensions["I"].width = 25.0
    ws.column_dimensions["J"].width = 40.0
    
    for i in range(len(jsondata)):
        row = jsondata[i]
    
        ws.append([row["id"], row["city"], row["geohash"], row["name"], row["address"], row["count"],
                   row["longitude"], row["latitude"], row["request_id"], row["short_address"]])
    wb.save(fileName)
    

    if name == 'main': # 程序运行入口

    if (os.path.exists(keywordExcel)):
        os.remove(keywordExcel)
    req = reqsetting()
    newUrl = req.get_full_url()
    for keyword in keywords:  # 遍历关键字集合,构造不同的请求参数,附加到URL 请求上
        params = {
            "extras[]": "count",
            "geohash": "wtmknpnr9yy3",
            "keyword": "%s" % keyword,
            "limit": "20",
            "type": "nearby"
        }
        params = urllib.parse.urlencode(params)  # 将请求参数进行编码
        req.full_url = newUrl + params  # 重新构造请求参数
        print(req.full_url)
        webpage = urllib.request.urlopen(req.full_url)  # 获取数据
        contentBytes = webpage.read().decode("utf-8")
        jsondata = json.loads(contentBytes)  # 将数据解析成json格式
        write2Excel(jsondata, keyword)  # 将数据写入excel 中
  • 相关阅读:
    springboot整合mybatis增删改查(一):项目创建
    springboot结合开源editor.md集成markdonw编辑器
    springboot发送邮件
    史上最全web.xml配置文件元素详解
    一套简约漂亮的响应式博客园主题皮肤分享给你们(二)
    一套简约漂亮的响应式博客园主题皮肤分享给你们(一)
    IDEA中项目统一编码格式设置
    windows上安装Gradle并配置环境变量
    linux自学(九)之开始centos学习,安装数据库MariaDB
    linux自学(七)之开始ccentos学习,安装jdk
  • 原文地址:https://www.cnblogs.com/WhiteCoder/p/10520550.html
Copyright © 2020-2023  润新知