# -*- coding: UTF-8 -*- import math import random import sys import threading import time from time import ctime, sleep import requests import xlrd target_citycode_list = ['010', '021', '020', '0755'] # target_citycode_list = ['0755'] adcode_dic = {} FEXCEL = '高德地图API_城市编码对照表.xlsx' data = xlrd.open_workbook(FEXCEL) table = data.sheets()[1] nrows = table.nrows ncols = table.ncols for i in range(0, nrows): l = table.row_values(i) name_ = l[0] adcode = l[1] citycode = l[2] if citycode in target_citycode_list: if citycode not in adcode_dic: adcode_dic[citycode] = {} adcode_dic[citycode][adcode] = {} adcode_dic[citycode][adcode]['name'] = name_ adcode_dic[citycode][adcode]['adcode'] = adcode REQUEST_LIST = [] for i in adcode_dic: for ii in adcode_dic[i]: REQUEST_LIST.append(adcode_dic[i][ii]['adcode']) REQUEST_LEN = len(REQUEST_LIST) EACH_THREAD_REQUEST_NUM = 1 MAX_PAGINATION = 100 QPS = 50 QPS_TIME_UNIT = 1 # http://lbs.amap.com/api/webservice/guide/tools/info INFOCODE_OK = '10000' KEY_POOL_LIST = [] touse_key = '' def dynamic_write_pool_file(): global KEY_POOL_LIST file_name_key_pool = 'key_pool.pool' keypoollist_old = KEY_POOL_LIST KEY_POOL_LIST = [] f = open(file_name_key_pool, 'r', encoding='utf-8') KEY_POOL_LIST = [] for i in f: try: key = i.split(' ')[1].split() KEY_POOL_LIST.append(key[0]) except Exception: print(Exception) f.closed d1 = keypoollist_old.reverse() d2 = KEY_POOL_LIST.reverse() print(63, d1) print(64, d2) if d1 == d2: print(time.time(), '-old') else: print(time.time(), '66POOL-new') # if (d1>d2)-(d1<d2) == 0: # print('64POOL-new') # else: # print('66POOL-old') # # if cmp(KEY_POOL_LIST_old.reverse(),KEY_POOL_LIST.reverse())==0: # print('64POOL-new') # else: # print('66POOL-new') # if KEY_POOL_LIST == KEY_POOL_LIST_old: # pass # else: # print('POOL-new') dynamic_write_pool_file() URL_TYPE = 'http://restapi.amap.com/v3/place/text' touse_key = '' # keywords = '&keywords=' OFFSET_NUM = 24 OFFSET = '&offset=%s' % (OFFSET_NUM) CITYLIMIT = '&citylimit=true' EXTENTION = '&extention=all' # 120000 商务住宅 商务住宅相关 商务住宅相关 # 120100 商务住宅 产业园区 产业园区 # 120200 商务住宅 楼宇 楼宇相关 # 120201 商务住宅 楼宇 商务写字楼 # 120202 商务住宅 楼宇 工业大厦建筑物 # 120203 商务住宅 楼宇 商住两用楼宇 # 120300 商务住宅 住宅区 住宅区 # 120301 商务住宅 住宅区 别墅 # 120302 商务住宅 住宅区 住宅小区 # 120303 商务住宅 住宅区 宿舍 # 120304 商务住宅 住宅区 社区中心 # 由于高德至多返回1000条,所以type值以最小粒度请求,逐个请求 POI_TYPES_LIST = ['120000', '120100', '120200', '120201', '120202', '120203', '120300', '120301', '120302', '120303', '120304'] # POI_TYPES = '&types=120000|120100|120200|120201|120202120203|120300|120301|120302|120303|120304' URL_FOR_CHANGE_KEY = 'http://restapi.amap.com/v3/place/text?key=%s&types=060100&city=010&OFFSET=1' change_key_qps = 0 def change_key(): global touse_key, change_key_qps, KEY_POOL_LIST dynamic_write_pool_file() # 高德没有遵守自己的QPS/日限策略;所不能通过其返回码,来控制key的使用; pool_num = len(KEY_POOL_LIST) mean_use_key = random.randint(0, pool_num) for i in range(mean_use_key, pool_num, 1): key = KEY_POOL_LIST[i] if key == touse_key: if i == pool_num: change_key() return else: continue touse_key = key url = URL_FOR_CHANGE_KEY % (touse_key) try: change_key_qps += 1 if change_key_qps % QPS == 0: sleep(QPS_TIME_UNIT) r = requests.get(url) json_ = r.json() except Exception: print('requests.get(url)', Exception) change_key() return infocode = json_['infocode'] if not infocode == INFOCODE_OK: if i == pool_num: sys.exit('NOInvalidKEY') change_key() return return requests_counter = 0 todo_list = REQUEST_LIST # {adcode:[[],[]]} tosupply_dic = {} def supply_dic(request): global tosupply_dic, requests_counter, todo_list, touse_key, POI_TYPES_LIST, OFFSET_NUM if requests_counter == 0: change_key() for type in POI_TYPES_LIST: url = '%s?key=%s&city=%s&type=%s%s%s' % (URL_TYPE, touse_key, request, type, OFFSET, CITYLIMIT) if requests_counter % QPS == 0: sleep(QPS_TIME_UNIT) try: requests_counter += 1 r = requests.get(url) r_json = r.json() except Exception: # 冗余 if request not in todo_list: todo_list.append(request) infocode = r_json['infocode'] if infocode == '10000': count = r_json['count'] page_count = math.ceil(int(count) / OFFSET_NUM) if page_count > 0: for page in range(1, page_count, 1): url_ = '%s&page=%s' % (url, page) print(url_) try: requests_counter += 1 r_ = requests.get(url_) r_json_ = r_.json() except Exception: # 冗余 if request not in todo_list: todo_list.append(request) infocode_ = r_json_['infocode'] if infocode_ == '10000': pois_list = r_json['pois'] if request not in tosupply_dic: tosupply_dic[request] = [] tosupply_dic[request].append(pois_list) if request in todo_list: list_index = todo_list.index(request) del todo_list[list_index] else: if request not in todo_list: todo_list.append(request) change_key() else: if request not in todo_list: todo_list.append(request) change_key() MAX_EXCEPTION_URL_NUM = 0 def deal_exception_list(): global todo_list print(todo_list) len_ = len(todo_list) if len_ > MAX_EXCEPTION_URL_NUM: for nloop in range(0, len_, 1): adcode = REQUEST_LIST[nloop] supply_dic(adcode) else: return deal_exception_list() class MyThread(threading.Thread): def __init__(self, func, args, name=''): threading.Thread.__init__(self) self.name = name self.func = func self.args = args def run(self): self.func(self.args) def main(): print('starting at:', ctime()) threads_list = [] thread_sum = math.ceil(REQUEST_LEN / EACH_THREAD_REQUEST_NUM) for nloop in range(0, thread_sum, 1): adcode = REQUEST_LIST[nloop] print(184, adcode) thread_instance = MyThread(supply_dic, (adcode), supply_dic.__name__) threads_list.append(thread_instance) # 主进程将在所有非守护进程退出后,退出 for t in threads_list: t.setDaemon = False t.start() # wait for all thrades to finish for t in threads_list: t.join() deal_exception_list() FGEN = 'GEN_GD_business_building.csv' fo = open(FGEN, 'w', encoding='utf-8-sig') fo.write( 'id,name,type,typecode,biz_type,address,location,tel,distance,biz_ext,pname,cityname,adname,shopid,shopinfo,poiweight ') fo.closed fo = open(FGEN, 'a', encoding='utf-8-sig') for request in tosupply_dic: l = tosupply_dic[request] for ll in l: for dic_ in ll: str = '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s ' % ( dic_['id'], dic_['name'], dic_['type'], dic_['typecode'], dic_['biz_type'], dic_['address'], dic_['location'].replace(',', ' '), dic_['tel'], dic_['distance'], dic_['biz_ext'], dic_['pname'], dic_['cityname'], dic_['adname'], dic_['shopid'], dic_['shopinfo'], dic_['poiweight']) fo.write(str) fo.closed if __name__ == '__main__': main()