版本
elasticsearch5.5.0
python3.7
说明
用python查询es上存储的状态数据,将查询到的数据用pandas处理成excel
code
# -*- coding: utf-8 -*-
# @Time : 2019/7/22 10:41
# @Author : Skyell Wang
# @FileName: es_data_get.py
from elasticsearch import Elasticsearch
import pandas as pd
import os
def elastic_data(vin):
"""
:param vin:
:return:
"""
# 连接es集群
es = Elasticsearch(["ip"],
http_auth=('elastic', 'password'),
port=9200)
# 根据特定条件获取数据
body = {
"query": {
"term": {
"vin": vin
}
}
}
# 取出vin odometer
para = {
"_source": "vin,odometer"
}
# 获取数据
query = es.search(index=ddfsdfd', doc_type='dfsf',
size=1000, body=body, params=para)
results = query['hits']['hits']
# 判断数据是否为空
if results == []:
print('vin数据不存在', vin)
else:
try:
# 存储逻辑:如果总里程数据存储,则正常存储,否则置为'null'
if 'odometer' not in results[0]['_source']:
results[0]['_source']['odometer'] = 0
print(results[0]['_source'])
else:
print(results[0]['_source'])
except NameError as e:
print(e)
return results[0]['_source']
if __name__ == "__main__":
path = "E:MyCodeML_InActiondata_analysisdata_do"
gc5_vin = "GC5_vin.csv"
gc5_vin_path = os.path.join(path, gc5_vin)
excel_file_name = 'GC5_data.xlsx'
excel_path = os.path.join(path, excel_file_name)
# 获取csv文件中固定列(vehicle_identifier)
df_data = pd.read_csv(gc5_vin_path, usecols=['vehicle_identifier'])
vin_data_list = []
for i in range(len(df_data)):
data_gc5 = elastic_data(df_data['vehicle_identifier'][i])
# 判断是否为字典类型,如果是则存入list中
is_dict = isinstance(data_gc5, dict)
if is_dict:
vin_data_list.append(data_gc5)
# 转成dateframe类型,并存入excel
vin_data_df = pd.DataFrame(vin_data_list)
vin_data_df.to_excel(excel_path, index=False)
print("任务已完成!")