import xlrd
from xlrd import xldate_as_tuple
import json
import requests
import datetime
import openpyxl
import os
import time
from bs4 import BeautifulSoup
import requests
import random
#第一步Execl中数据的获取
class ExcelData():
def __init__(self, data_path, sheetname):
self.data_path = data_path
self.sheetname = sheetname
self.data = xlrd.open_workbook(self.data_path)
self.table = self.data.sheet_by_name(self.sheetname)
self.keys = self.table.row_values(0)
self.rowNum = self.table.nrows
self.colNum = self.table.ncols
def readExcel(self):
datas = []
for i in range(1, self.rowNum):
sheet_data = {}
for j in range(self.colNum):
c_type = self.table.cell(i,j).ctype
c_cell = self.table.cell_value(i, j)
if c_type == 2 and c_cell % 1 == 0:
c_cell = int(c_cell)
elif c_type == 3:
date = datetime.datetime(*xldate_as_tuple(c_cell,0))
c_cell = date.strftime('%Y/%d/%m %H:%M:%S')
elif c_type == 4:
c_cell = True if c_cell == 1 else False
sheet_data[self.keys[j]] = c_cell
datas.append(sheet_data)
return datas
#第二步对获取出Execl表中的数据匹配百度地图api获取经纬度
def get_dim(addr,tag,city,province):
url = f'http://api.map.baidu.com/place/v2/search?query={addr}&tag={tag}®ion={city}'
f'&output=json&ak=你的ak'.format(addr,tag,city)
response = requests.get(url)
data = json.loads(response.text)
# print(data)
# print(data.get('results'))
try:
if len(data.get('results'))>0:
lat = data.get('results')[0].get('location').get('lat')
lng = data.get('results')[0].get('location').get('lng')
return str(lng)+','+str(lat)
else:
url1 = f'http://api.map.baidu.com/place/v2/search?query={city}&tag={tag}®ion={city}'
f'&output=json&ak=你的ak'.format(addr, tag, city)
response = requests.get(url1)
data1 = json.loads(response.text)
if len(data1.get('results')) > 0:
lat1 = data1.get('results')[0].get('location').get('lat')
lng1 = data1.get('results')[0].get('location').get('lng')
return str(lng1)+','+str(lat1)
else:
url2 = f'http://api.map.baidu.com/place/v2/search?query={province}&tag={tag}'
f'®ion={province}&output=json&ak=你的ak'.format(
addr, tag, city)
response = requests.get(url2)
data2 = json.loads(response.text)
if len(data2.get('results')) > 0:
lat2 = data2.get('results')[0].get('location').get('lat')
lng2 = data2.get('results')[0].get('location').get('lng')
return str(lng2)+','+str(lat2)
else:
return ""
except :
return ""
#
# #将获取到的代理IP存到ip_list中并返回列表
# def get_ip_list():
# # 随机代理获取的网站
# url = 'http://www.xicidaili.com/nn/'
# headers = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
# }
# web_data = requests.get(url, headers=headers)
# soup = BeautifulSoup(web_data.text, 'lxml')
# ips = soup.find_all('tr')
# ip_list = []
# for i in range(1, len(ips)):
# ip_info = ips[i]
# tds = ip_info.find_all('td')
# ip_list.append(tds[1].text + ':' + tds[2].text)
# return ip_list
#
# #从代理IP列表中随机取出一个IP并返回
# def get_random_ip(ip_list):
# proxy_list = []
# for ip in ip_list:
# proxy_list.append('http://' + ip)
# proxy_ip = random.choice(proxy_list)
# proxies = {'http': proxy_ip}
# return proxies
if __name__ == "__main__":
#代理IP的使用
# ip_list = get_ip_list()
#获取文件夹名称以及路径
path = datePath
os.chdir(path)
date = datetime.datetime.now().strftime("%Y%m%d")
data_path = "经纬度{}.xlsx".format(date)
# print(data_path)
print("收到输入,开始执行经纬度获取!")
sheetname = "Sheet1"
get_data = ExcelData(data_path, sheetname)
datas = get_data.readExcel()
# print(datas)
print(len(datas))
flag = 1
#第三步存储到Execl表中固定位置
for data in datas:
try:
# ip = get_random_ip(ip_list)
addr = data.get('项目地址')
#对地址进行去“#”处理,否则有的地址带有的话获取不到数据
addr = addr.replace("#",'')
addr = addr.replace(",",'')
tag = data.get('用途')
city = data.get("城市")
province = data.get("省份")
if(addr):
print(addr)
# print(get_dim(addr,tag,city))
co = get_dim(addr,tag,city,province)
wb = openpyxl.load_workbook(data_path)
ws = wb['Sheet1']
ws.cell(row=flag + 1, column=6).value = co
wb.save(data_path)
wb.close()
flag += 1
if(co):
print(co)
# time.sleep(0.5)
except():
continue
# print(datas)
print("执行完毕,共",flag,"行数据!",'继续下一步操作转移数据...')
get_data = ExcelData(data_path, "Sheet1")
datas = get_data.readExcel()
# print(datas)
lista = []
for i in datas:
lista.append(i.get("地图坐标"))
# print(lista)
wb = openpyxl.load_workbook('{}.xlsx'.format(date))
ws = wb.worksheets[0]
for index, row in enumerate(ws.rows):
if index == 0:
row[23].value = '地图坐标'
else:
row[23].value = lista[index - 1]
wb.save('{}.xlsx'.format(date))
print("经纬度操作全部完成!")