#!/usr/bin/python import threading import json import time from elasticsearch import Elasticsearch from elasticsearch import helpers import os import sys import argparse host_list = [ {"host":"1.58.55.11","port":9200}, {"host":"1.58.55.12","port":9200}, {"host":"1.58.55.13","port":9200}, ] es = Elasticsearch(host_list) size = 1000 query = es.search(index='full_sight',scroll='1m',size=size) results = query['hits']['hits'] # es查询出的结果第一页 total = query['hits']['total'] # es查询出的结果总量 scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果 # 获取总的页数 page = divmod(total,size) if page[1] == 0: page = page[0] else: page = page[0] + 1 import hashlib obj = hashlib.md5() num = 1 # 获取所有的数据,计算每条数据的md5值,然后写到文件中 for i in range(0, page): # scroll参数必须指定否则会报错 query_scroll = es.scroll(scroll_id=scroll_id,scroll='1m',)['hits']['hits'] for m in query_scroll: temp = {} s = json.dumps(m) obj.update(bytes(s,encoding="utf-8")) v = obj.hexdigest() k = m["_id"] temp[k] = v with open("test.text","a") as f: f.write(json.dumps(temp)) f.write(" ") print(k,num,sep="============>") num += 1