• 百度地图爬取数据


    # -*- coding:utf-8 -*-
    import requests
    import re
    import xlwt
    import demjson
    import time
    import json

    class get_location():

    def __init__(self):
    self.i = 0
    self.li = []
    self.dict1 = {}
    self.li_ak = 'U0QGae7viQsN0yLBirGsRD90XI0tlcGO'
    self.headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36',
    'Cookie': 'JSESSIONID=15BF07D8D4F4515A5C2247D5606AECB2'
    }
    self.f = xlwt.Workbook(encoding='utf-8')
    self.sheet1 = self.f.add_sheet(u'sheet1', cell_overwrite_ok=True)

    def get_html(self, url):
    '''
    请求
    :param url:
    :return:
    '''
    #print(url)
    try:
    res = requests.get(url, headers=self.headers)
    res.encoding = res.apparent_encoding
    if res.status_code == 200:
    html = res.text
    return html
    else:
    time.sleep(0.1)
    return self.get_html(url)
    except Exception as e: # except BaseException 这个也可以 e是打印出错误的原因
    print("问题是", e)


    def parse(self, response,url_location_detail): # 解析
    '''
    :param response: 网页源码
    :param url_location_detail: 拼接url的一部分
    :return:
    '''
    #num = 0
    response_json = json.loads(response) #[{"id":610300,"lat":34.36784,"lng":107.24291,"name":"宝鸡市","}] 把这样的数据转成response_json
    for i in response_json:
    if "区" in i['name']:
    url = url_location_detail + "?query="+ i['name'] + "&region=" + "宝鸡市"+ i['name'] + "&output=json&ak=" + self.li_ak
    #print(url)
    self.li.append(url)

    if "街道" in i['name']:
    url = url_location_detail + "?query="+ i['name'] + "&region=" + "宝鸡市"+ i['name'] + "&output=json&ak=" + self.li_ak
    #print(url)
    self.li.append(url)


    if "县" in i['name']:
    url = url_location_detail + "?query="+ i['name'] + "&region=" + i['name'] + "&output=json&ak=" + self.li_ak
    #print(url)
    self.li.append(url)


    if "镇" in i['name']:
    #http://api.map.baidu.com/place/v2/search?query=坪头镇&region=宝鸡市坪头镇&output=json&ak=cnYtqDjL7NzjYIVQXV67RVxy6oIF0Nsp
    url = url_location_detail + "?query="+ i['name'] + "&region=" + "宝鸡市"+ i['name'] + "&output=json&ak=" + self.li_ak
    #print(url)
    self.li.append(url)


    if "公园" in i['name']:
    url = url_location_detail + "?query=" + i['name'] + "&region=" + "宝鸡市" + i['name'] + "&output=json&ak=" + self.li_ak
    #print(url)
    self.li.append(url)



    if "法门寺" in i['name']:
    url = url_location_detail + "?query"+ i['name'] + "&region=" + "宝鸡市"+ i['name'] + "&output=json&ak=" + self.li_ak
    #print(url)
    self.li.append(url)

    # num += 1
    # if num > 3:
    # num = 0


    def red_dot_parse(self,response): #红点的坐标拼接url.
    response_json = json.loads(response)
    view_url = "http://api.map.baidu.com/panorama/v2?ak={}&width=512&height=256&location={},{}&fov=180"
    for i in response_json.get("results"):
    name = i.get("name")
    a = i.get('location')
    try:
    lat = a['lat']
    lng = a['lng']
    detail_view_url = view_url.format(self.li_ak,lng,lat)
    self.dict1[detail_view_url] = name
    self.Judge(detail_view_url) # 去判断
    except Exception:
    pass

    def Judge(self,detail_view_url):
    #print("判断是否全景的url",detail_view_url)
    res = requests.get(detail_view_url, headers=self.headers)
    #print(res.headers['Content-Type'])
    if res.headers['Content-Type'] == 'image/jpeg': #判断如果等于image/jpeg
    print("有全景的地方",self.dict1[res.url])



    else:
    js = res.json()
    #print(js['status'])
    #print('没有全景',self.dict1[res.url])

    self.excel_write(self.dict1[res.url])

    def excel_write(self, text_ip):

    self.sheet1.write(self.i, 0, text_ip)
    self.i += 1
    self.f.save(r'C:Users85740Desktopget_ip.xls') # 保存

    if __name__ == '__main__':
    url = "http://imapway.cn:8098/bjzs/video/getVideoCatalog"
    a = get_location()
    url_location_detail = 'http://api.map.baidu.com/place/v2/search'
    a.parse(a.get_html(url),url_location_detail) # 解析详细页面, 调用requests请求
    for url in a.li:
    a.red_dot_parse(a.get_html(url)) #提取红点的坐标.

  • 相关阅读:
    Python--前端之HTML
    Python--MySql(主键的创建方式、存储引擎、存储过程、索引、pymsql)
    python--MySql(外键约束、多表查询(*****))
    python--MySql 表记录的操作
    python--MySql
    Python--线程队列(queue)、multiprocessing模块(进程对列Queue、管道(pipe)、进程池)、协程
    Python--同步锁(互斥锁)、死锁(状态)、递归锁、信号量、Event对象
    Python--多线程、多进程常用概念
    Python--基础之socket编程
    ubuntu 安装 flashplayer
  • 原文地址:https://www.cnblogs.com/yuanjia8888/p/9831060.html
Copyright © 2020-2023  润新知