网上的大部教程都讲到了elasticsearch使用scroll游标的方法,但使用后往往没有清除游标,这会造成scroll超过最大数量的限制而报错,应该在任务结束时去手动清理scroll(否则只能等到设定的时间后游标才会自动清理)
from elasticsearch import Elasticsearch def main(): es = Elasticsearch([***], http_auth = ('***', '****'), port = *** ) query = *** page = es.search( index= ** *, scroll = '2m', size = 1000, body = {"query": query}) sid = page['_scroll_id'] sid_list = [sid] scroll_size_max = page['hits']['total']['value'] cnt = 0 while cnt < scroll_size_max: for info in page['hits']['hits']: # do something cnt += 1 page = es.scroll(scroll_id=sid, scroll='2m') sid = page['_scroll_id'] sid_list.append(sid) for sid_del in sid_list: es.clear_scroll(scroll_id=sid_del) if __name__ == "__main__": main()