• python 操作es


    from elasticsearch import Elasticsearch
    
    es = Elasticsearch()
    
    #  创建索引
    def deleteInices(my_index):
        if True and es.indices.exists(my_index): # 确认删除再改为True
            print('删除之前存在的')
            es.indices.delete(index=my_index)
    
    def createIndex(my_index,my_doc):
        # index settings    索引配置
        settings = {
            "mappings":{
                my_doc:{ # 只有my_doc可以改变(相当于表名)
                    "properties":{
                        "my_id":{"type":"integer"}, # 相当于字段名,指定该字段的类型
                        "my_word":{
                            "type":"text",
                            "analyzer":"ik_smart", #指定分词为智能分词,如果不指定,则会用默认分词,会把每个字作为分词
                            "search_analyzer":"ik_smart" # 指定搜索引擎为智能分词搜索
    
                        }
                    }
    
                }
            }
        }
        # create index
        es.indices.create(index=my_index,ignore=400,body=settings)
        print('创建索引成功')
    
    def mainCreateIndex():
        # 调用后创建index
        my_index = "word2vec_index"
        my_doc = "my_doc"
        deleteInices(my_index)
        createIndex(my_index,my_doc)
    
    # mainCreateIndex()
    
    
    # 插入数据
    
    from elasticsearch import helpers
    
    def getAllWords(path="vocab.txt"):
        # 将数据从文件中读出
    
        words = []
        with open(path,"r",encoding='utf-8') as f:
            for i,item in enumerate(f.readlines()):
                words.append((i,item.strip()))
            return words
    
    
    def insertData(words,my_index,my_doc,one_bulk):
        # 插入数据
        # one_bulk表示一个bulk里装多少个
        body = []
        body_count = 0 # 记录bodu里面有多少个
        # 最后一个Bulk可能没满one_bulk,但也要插入
        print("共需要插入%d条"%len(words))
    
        for id,word in words:
            data1 = {
                "my_id":id,
                "my_word":word
            }
            every_body = {
                "_index":my_index,
                "_type":my_doc,
                "_source":data1
            }
            if body_count<one_bulk:
                body.append(every_body)
                body_count += 1
            else:
                helpers.bulk(es,body)
                body_count = 0
                body.clear()
                body.append(every_body)
                body_count +=1
    
        if len(body)>0:
            # 如果body里还有数据,则再插入一次
            helpers.bulk(es,body)
        print("插入数据完成")
    
    def mainInset():
        #调用后插入数据
        my_index = "word2vec_index"
        my_doc = "my_doc"
        words = getAllWords()
        insertData(words,my_index,my_doc,one_bulk=5000)
    
    
    # mainInset()
    
    # es查询
    
    def keywordSearch(keywords1,my_index,my_doc):
        # 根据keywords1来查找
        my_search1 = {
            "query":{
                "match":{
                    "my_word":keywords1
                }
            }
        }
        # 直接查询
        # res = es.search(index=my_index,body=my_search1)
        # total = res["hits"]["total"] # 一共这么多个
        # print("共查询到%d条数据"%total.get('value'))
    
        # helpers查询
        es_result = helpers.scan(
            client=es,
            query=my_search1,
            scroll='10m',
            index=my_index,
            timeout = '10m'
        )
        es_result = [item for item in es_result] # 原始是生成器<generator object scan at 0x0000021210>
        print(es_result) # 现在才可以直接打印查看
        search_res = []
        for item in es_result:
            tmp = item['_source']
            search_res.append((tmp['my_id'],tmp['my_word']))
        print("共查询到%d条数据"%len(es_result))
        print(search_res)
    
    
    def mainSearch():
        # 调用后检索数据
        my_index = "word2vec_index"
        my_doc = "my_doc"
        keywords1 = "氨基酸"
        keywordSearch(keywords1,my_index,my_doc)
    
    
    mainSearch()
  • 相关阅读:
    IPv4地址被用光,IPv6将接手
    杀猪盘
    大家都应该看看这个贴子,会让你心明眼亮。 注意到这些变化了吗?中国正在发生的100个变化,越往后读越震惊!
    区块链在中国怎么练?
    区块链到底是什么样的技术呢?
    2019感恩节
    人工智能、大数据、物联网、区块链,四大新科技PK,你更看好谁?
    vue遇见的问题(2)---imported multiple times(转载)
    drf-序列化器的理解
    Django rest_framework序列化many=True参数解释
  • 原文地址:https://www.cnblogs.com/ltyc/p/16220876.html
Copyright © 2020-2023  润新知