• python读取es中的所有数据并计算md5然后进行持久化


    #!/usr/bin/python
    import threading
    import json
    import time
    from elasticsearch import Elasticsearch
    from elasticsearch import helpers
    import os
    import sys
    import argparse
    host_list = [
        {"host":"1.58.55.11","port":9200},
        {"host":"1.58.55.12","port":9200},
        {"host":"1.58.55.13","port":9200},
    ]
    
    
    es = Elasticsearch(host_list)
    
    
    
    size = 1000
    query = es.search(index='full_sight',scroll='1m',size=size)
    results = query['hits']['hits'] # es查询出的结果第一页
    
    total = query['hits']['total'] # es查询出的结果总量
    scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果
    
    
    
    # 获取总的页数
    page = divmod(total,size)
    if page[1] == 0:
        page = page[0]
    else:
        page = page[0] + 1
    
    import hashlib
    obj = hashlib.md5()
    num = 1
    
    # 获取所有的数据,计算每条数据的md5值,然后写到文件中
    for i in range(0, page): # scroll参数必须指定否则会报错
        query_scroll = es.scroll(scroll_id=scroll_id,scroll='1m',)['hits']['hits']
        for m in query_scroll:
            temp = {}
            s = json.dumps(m)
            obj.update(bytes(s,encoding="utf-8"))
            v = obj.hexdigest()
            k = m["_id"]
    
            temp[k] = v
            with open("test.text","a") as f:
                f.write(json.dumps(temp))
                f.write("
    ")
    
            print(k,num,sep="============>")
            num += 1
    
  • 相关阅读:
    IIS配置跨域请求
    ABP框架页面权限验证
    WPF-DataGrid增删改查不绑定数据源
    WPF-DataGrid增删改查绑定数据源
    .NET开发框架集合(长期更新)
    C# Webbrowser 常用方法及多线程调用
    Devexpress-GridLookUpEdit
    Devexpress提示框的使用
    Asp.Net MVC中Action跳转小结
    ASP.NET MVC备忘
  • 原文地址:https://www.cnblogs.com/bainianminguo/p/10718713.html
Copyright © 2020-2023  润新知