• elastic_search 指令


    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    """ pass
    """
    
    import os
    import sys
    import jieba
    
    sys.path.append(os.path.dirname(os.path.split(os.path.realpath(__file__))[0]))
    
    from elasticsearch import Elasticsearch
    from conf.settings import FAQ_ES_CONF    # [{'host': '192.168.7.173', 'port': 9200}]
    
    
    es_ser = Elasticsearch(FAQ_ES_CONF)
    
    es_ser.indices.delete(index='customer', ignore=404)
    
    es_ser.indices.create(index='customer', ignore=400)
    
    body={"properties":{'about': {'type': 'string'},
                        'name': {'type': 'string'},
                        'age': {'type': 'integer'},
                        'score': {'type': 'integer'},
                        'company': {'type': 'string', 'index': 'not_analyzed'},
                        'interests': {'type': 'string'},
                        'timestamp': {'type': 'date'},
                        'id': {'type': 'integer'}}}
    
    es_ser.indices.put_mapping(index='customer', doc_type='round_FAQ2', body=body)
    
    es_ser.index(index='customer',
                 doc_type='round_FAQ2',
                 id=1,
                 body={"name":"wulangzhou",
                       "age": 25,
                       "score": [85,75,95],
                       "about": jieba.lcut('i like think deep'),
                       "company": 'zhangyue',
                       "interests": ["music"],
                       "timestamp": '2016'})
    
    es_ser.index(index='customer',
                 doc_type='round_FAQ2',
                 id=2,
                 body={"name":"yanweihong",
                       "age": 28,
                       "about": jieba.lcut('i like exercise more'),
                       "score": [90,85,77],
                       "company": 'zhangyue',
                       "interests": ["forestry", 'i', 'like'],
                       "timestamp": '2017'})
    
    es_ser.index(index='customer',
                 doc_type='round_FAQ2',
                 id=3,
                 body={"name":"liumin",
                       "age": 28,
                       "about": jieba.lcut('i like cat'),
                       "score": [80, 80, 80, 80],
                       "company": 'jindong',
    "weight": 85,
    "interests": ['game'], "timestamp": '2016'}) import time time.sleep(1) body={'query': {'multi_match': {'query': 'i like cat' , 'fields': ['about', 'interests'], 'type': 'most_fields',}}} #'tie_breaker': 0.2}}}

    body={'query': {'match_phrase': {'about': 'i like'}}}
    body={'query': {'range': {'age': {'gte': 18, 'lte': 35}}}}
    body={'query': {'match_all': {}}}

    body={'query': {'terms': {'age': [22, 20]}}}

    body={'query': {'exists': {'field': 'weight'}}}

    for sources in es_ser.search(index='customer', doc_type='round_FAQ2', body=body)['hits']['hits']:
        for k, v in sources.items():
            print k, v
        print ''


    '''
    http://www.tuicool.com/articles/uAbmuaU
    match_phrase 可以看about 字段,如果该字段是string 且被设置为默认分词,可以看做是‘query_str‘ in ‘match_string’(查询字符和匹配字符都不分词进行匹配)?
    match 可以看about 字段,表示 query_str分词后中的每一个词,与match_string分词后中的所有词,看能匹配到几个(查询字符和匹配字符都进行分词匹配)。
    term 与 match_phrase 稍微有点区别
    ‘query_str‘ == ‘match_string’ ?) (不进行分词的匹配)
    multi_match 如果搭配 most_fields 表示fields中的所有字段,分词后尽量匹配多的词的和(不要带tie_breaker)
    如果搭配 best_fields 表示完全匹配的分值最高 比如 i like cat 如果全部匹配到了则分高(带tie_breaker)
    terms 与term 类似
    bool 当我们需要and or 查询的时候,可以用 bool 查询,查询条件可以嵌套 { "bool" : { "must" : [], "should" : [], "must_not" : [], } }
    def get_analyze_body(**kargs):
        """ 将查询条件转成特殊的查询参数
        """
        from faq.doc_idf import get_phrases_rate
    
        question = kargs.get('question')
        if question and isinstance(question, str):
            question = question.decode('utf-8')
    
        question = replace_string(question)
    
        question_args = get_right_phrases(filter_phrases(jieba_cut(question)))
        channel_num_arg = kargs.get('channel_num')
        version_arg = kargs.get('version')
    
        question_arg_rate = get_phrases_rate(question_args)
    
        should = []
        for question_arg, rate in question_arg_rate.items():
            should.append({'match_phrase': {'question': {'query': question_arg,
                                                         'boost': 10 * rate}}})
    
        must_channel_num = []
        must_channel_num.append({'match_phrase': {'channel_num': {'query': -1,
                                                                  'boost': 1}}})
        if channel_num_arg:
            must_channel_num.append({'match_phrase': {'channel_num': {'query': int(channel_num_arg),
                                                                      'boost': 1.5}}})
    
        must_version = []
        must_version.append({'match_phrase': {'version': {'query': -1,
                                                          'boost': 1}}})
        if version_arg:
            must_version.append({'match_phrase': {'version': {'query': int(version_arg),
                                                              'boost': 1.5}}})
    
        return {'query': {'bool': {'should': should,
                                   'must': [{'bool': {'should': must_channel_num}},
                                            {'bool': {'should': must_version}}]}},
                'min_score': 1}
     
  • 相关阅读:
    汉语-词语:冷静
    汉语-词语:沉着
    汉语-词语-稳重:百科
    汉语-词语:沉稳
    汉语-词语-丘壑:百科
    Struts中的常量
    算法整理(四):浅析高速排序的优化问题
    互联网+时代,是更加开放还是封闭
    UI复习练习_优酷布局
    fread与read的差别(文件io补充)
  • 原文地址:https://www.cnblogs.com/wulangzhou/p/6693458.html
Copyright © 2020-2023  润新知