• Elasticsearch搜索引擎学习笔记(五)


    搜索功能

    数据准备

      1、自定义词库

        慕课网

        慕课

        课网

        慕

        课

        网

      2、新建立索引shop

      3、建立mappings

    POST        /shop/_mapping        (7.x之前的版本:/shop/_mapping/_doc)
    {
        "properties": {
            "id": {
                "type": "long"
            },
            "age": {
                "type": "integer"
            },
            "username": {
                "type": "keyword"
            },
            "nickname": {
                "type": "text",
                "analyzer": "ik_max_word"
            },
            "money": {
                "type": "float"
            },
            "desc": {
                "type": "text",
                "analyzer": "ik_max_word"
            },
            "sex": {
                "type": "byte"
            },
            "birthday": {
                "type": "date"
            },
            "face": {
                "type": "text",
                "index": false
            }
        }
    }
    

      4、录入数据

    POST         /shop/_doc/1001
    
    {
        "id": 1001,
        "age": 18,
        "username": "imoocAmazing",
        "nickname": "慕课网",
        "money": 88.8,
        "desc": "我在慕课网学习java和前端,学习到了很多知识",
        "sex": 0,
        "birthday": "1992-12-24",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1002,
        "age": 19,
        "username": "justbuy",
        "nickname": "周杰棍",
        "money": 77.8,
        "desc": "今天上下班都很堵,车流量很大",
        "sex": 1,
        "birthday": "1993-01-24",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1003,
        "age": 20,
        "username": "bigFace",
        "nickname": "飞翔的巨鹰",
        "money": 66.8,
        "desc": "慕课网团队和导游坐飞机去海外旅游,去了新马泰和欧洲",
        "sex": 1,
        "birthday": "1996-01-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1004,
        "age": 22,
        "username": "flyfish",
        "nickname": "水中鱼",
        "money": 55.8,
        "desc": "昨天在学校的池塘里,看到有很多鱼在游泳,然后就去慕课网上课了",
        "sex": 0,
        "birthday": "1988-02-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1005,
        "age": 25,
        "username": "gotoplay",
        "nickname": "ps游戏机",
        "money": 155.8,
        "desc": "今年生日,女友送了我一台play station游戏机,非常好玩,非常不错",
        "sex": 1,
        "birthday": "1989-03-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1006,
        "age": 19,
        "username": "missimooc",
        "nickname": "我叫小慕",
        "money": 156.8,
        "desc": "我叫凌云慕,今年20岁,是一名律师,我在琦䯲星球做演讲",
        "sex": 1,
        "birthday": "1993-04-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1007,
        "age": 19,
        "username": "msgame",
        "nickname": "gamexbox",
        "money": 1056.8,
        "desc": "明天去进货,最近微软处理很多游戏机,还要买xbox游戏卡带",
        "sex": 1,
        "birthday": "1985-05-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1008,
        "age": 19,
        "username": "muke",
        "nickname": "慕学习",
        "money": 1056.8,
        "desc": "大学毕业后,可以到imooc.com进修",
        "sex": 1,
        "birthday": "1995-06-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1009,
        "age": 22,
        "username": "shaonian",
        "nickname": "骚年轮",
        "money": 96.8,
        "desc": "骚年在大学毕业后,考研究生去了",
        "sex": 1,
        "birthday": "1998-07-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1010,
        "age": 30,
        "username": "tata",
        "nickname": "隔壁老王",
        "money": 100.8,
        "desc": "隔壁老外去国外出差,带给我很多好吃的",
        "sex": 1,
        "birthday": "1988-07-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1011,
        "age": 31,
        "username": "sprder",
        "nickname": "皮特帕克",
        "money": 180.8,
        "desc": "它是一个超级英雄",
        "sex": 1,
        "birthday": "1989-08-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    
    {
        "id": 1012,
        "age": 31,
        "username": "super hero",
        "nickname": "super hero",
        "money": 188.8,
        "desc": "BatMan, GreenArrow, SpiderMan, IronMan... are all Super Hero",
        "sex": 1,
        "birthday": "1980-08-14",
        "face": "https://www.imooc.com/static/img/index/logo.png"
    }
    

    请求参数的查询(QueryString)

    GET     /shop/_doc/_search?q=desc:慕课网
    GET     /shop/_doc/_search?q=nickname:慕&q=age:25
    

    DSL查询

    QueryString用的很少,一旦参数复杂就难以构建,所以大多查询都会使用dsl来进行查询更好。

    # 查询
    POST     /shop/_doc/_search
    {
        "query": {
            "match": {
                "desc": "慕课网"
            }
        }
    }
    # 判断某个字段是否存在
    {
        "query": {
            "exists": {
    	        "field": "desc"
    	    }
        }
    }

    查询所有

    GET     /shop/_doc/_search
    
    或
    
    POST     /shop/_doc/_search
    {
        "query": {
            "match_all": {}
        },
        "_source": ["id", "nickname", "age"]
    }  

    分页

    POST     /shop/_doc/_search
    {
        "query": {
            "match_all": {}
        },
        "from": 0,
        "size": 10
    }
    
    {
    	"query": {
    		"match_all": {}
    	},
    	"_source": [
    		"id",
    		"nickname",
    		"age"
    	],
    	"from": 0,
    	"size": 10
    }
    

      

    term精确搜索与match分词搜索

    term搜索的时候会把用户搜索内容,比如“慕课网强大”作为一整个关键词去搜索,而不会对其进行分词后再搜索;

    match会把用户搜索内容分词,然后再搜索

    POST     /shop/_doc/_search
    {
        "query": {
            "term": {
                "desc": "慕课网"
            }
        }
    }
    对比
    {
        "query": {
            "match": {
                "desc": "慕课网"
            }
        }
    }
    

      

    terms 多个词语匹配检索

    POST     /shop/_doc/_search
    {
        "query": {
            "terms": {
                "desc": ["慕课网", "学习", "骚年"]
            }
        }
    }
    

      

    match_phrase 短语匹配

    match:分词后只要有匹配就返回,match_phrase:分词结果必须在text字段分词中都包含,而且顺序必须相同,而且必须都是连续的。(搜索比较严格)

    slop:允许词语间跳过的数量,是“词”的数量,不是“字”的数量

    POST     /shop/_doc/_search
    {
        "query": {
            "match_phrase": {
                "desc": {
                	"query": "大学 毕业 研究生",
                	"slop": 2
                }
            }
        }
    }
    

      

    match(operator)

    operator

      or:搜索内容分词后,只要存在一个词语匹配就展示结果

      and:搜索内容分词后,都要满足词语匹配。

    POST     /shop/_doc/_search
    {
        "query": {
            "match": {
                "desc": "慕课网"
            }
        }
    }
    # 等同于
    {
        "query": {
            "match": {
                "desc": {
                    "query": "xbox游戏机",
                    "operator": "or"
                }
            }
        }
    }
    # 相当于 select * from shop where desc='xbox' or|and desc='游戏机'
    

      

    match(minimum_should_match)

    minimum_should_match

      minimum_should_match: 最低匹配精度,至少有[分词后的词语个数]x百分百,得出一个数据值取整。举个例子:当前属性设置为<code>70</code>,若一个用户查询检索内容分词后有10个词语,那么匹配度按照 10x70%=7,则desc中至少需要有7个词语匹配,就展示;若分词后有8个,则 8x70%=5.6,则desc中至少需要有5个词语匹配,就展示。

      minimum_should_match 也能设置具体的数字,表示个数

    POST     /shop/_doc/_search
    {
        "query": {
            "match": {
                "desc": {
                    "query": "女友生日送我好玩的xbox游戏机",
                    "minimum_should_match": "60%"
                }
            }
        }
    }
    

      

    根据文档主键ids搜索

    GET /shop/_doc/1001
    
    或
    
    POST     /shop/_doc/_search
    
    {
        "query": {
            "ids": {
                "type": "_doc",
                "values": ["1001", "1010", "1008"]
            }
        }
    }
    

      

     multi_match/boost

    multi_match

      满足使用match在多个字段中进行查询的需求

    POST     /shop/_doc/_search
    {
        "query": {
            "multi_match": {
                    "query": "皮特帕克慕课网",
                    "fields": ["desc", "nickname"]
    
            }
        }
    }
    

      

    boost

      权重,为某个字段设置权重,权重越高,文档相关性得分就越高。通畅来说搜索商品名称要比商品简介的权重更高。

      nickname^10 代表搜索提升10倍相关性,也就是说用户搜索的时候其实以这个nickname为主,desc为辅,nickname的匹配相关度当然要提高权重比例了。

    POST     /shop/_doc/_search
    {
        "query": {
            "multi_match": {
                    "query": "皮特帕克慕课网",
                    "fields": ["desc", "nickname^10"]
    
            }
        }
    }
    

      

    布尔查询

    可以组合多重查询

      must:查询必须匹配搜索条件,譬如 and
      should:查询匹配满足1个以上条件,譬如 or
      must_not:不匹配搜索条件,一个都不要满足

    POST     /shop/_doc/_search
    
    {
        "query": {
            "bool": {
                "must": [
                    {
                        "multi_match": {
                            "query": "慕课网",
                            "fields": ["desc", "nickname"]
                        }
                    },
                    {
                        "term": {
                            "sex": 1
                        }
                    },
                    {
                        "term": {
                            "birthday": "1996-01-14"
                        }
                    }
                ]
            }
        }
    }
    
    {
        "query": {
            "bool": {
                "should(must_not)": [
                    {
                        "multi_match": {
                            "query": "学习",
                            "fields": ["desc", "nickname"]
                        }
                    },
                    {
                    	"match": {
                    		"desc": "游戏"
                    	}	
                    },
                    {
                        "term": {
                            "sex": 0
                        }
                    }
                ]
            }
        }
    }
    
    {
        "query": {
            "bool": {
                "must": [
                    {
                    	"match": {
                    		"desc": "慕"
                    	}	
                    },
                    {
                    	"match": {
                    		"nickname": "慕"
                    	}	
                    }
                ],
                "should": [
                    {
                    	"match": {
                    		"sex": "0"
                    	}	
                    }
                ],
                "must_not": [
                    {
                    	"term": {
                    		"birthday": "1992-12-24"
                    	}	
                    }
                ]
            }
        }
    }
    

      

    为指定词语加权

    特殊场景下,某些词语可以单独加权,这样可以排得更加靠前。

    POST     /shop/_doc/_search
    {
        "query": {
            "bool": {
                "should": [
                	{
                		"match": {
                			"desc": {
                				"query": "律师",
                				"boost": 18
                			}
                		}
                	},
                	{
                		"match": {
                			"desc": {
                				"query": "进修",
                				"boost": 2
                			}
                		}
                	}
                ]
            }
        }
    }
    

      

    过滤器

    对搜索出来的结果进行数据过滤。不会到es库里去搜,不会去计算文档的相关度分数,所以过滤的性能会比较高,过滤器可以和全文搜索结合在一起使用。

    post_filter元素是一个顶层元素,只会对搜索结果进行过滤。不会计算数据的匹配度相关性分数,不会根据分数去排序,query则相反,会计算分数,也会按照分数去排序。

      query:根据用户搜索条件检索匹配记录

      post_filter:用于查询后,对结果数据的筛选

    POST     /shop/_doc/_search
    
    {
    	"query": {
    		"match": {
    			"desc": "慕课网游戏"
    		}	
        },
        "post_filter": {
    		"range": {
    			"money": {
    				"gt": 60,
    				"lt": 1000
    			}
    		}
    	}	
    }
    

      上边是关键词是“慕课网游戏”,并且过滤条件是“money”大于60且小于1000的。

      现在改一下,关键词是“慕课网游戏”,并且过滤条件是“money”小于60或大于1000的,这个该怎么写呢?答案如下:

    {
        "query": {
            "bool": {
                "must": [
                    {
                        "match": {
                            "desc": "慕课网"
                        }
                    }
                ],
                "should": [
                    {
                        "range": {
                            "money": {
                                "lt": 60
                            }
                        }
                    },
                    {
                        "range": {
                            "money": {
                                "gt": 1000
                            }
                        }
                    }
                ],
                "minimum_should_match":1
            }
        }
    }
    

      

    排序

    POST     /shop/_doc/_search
    {
    	"query": {
    		"match": {
    			"desc": "慕课网游戏"
    		}
        },
        "post_filter": {
        	"range": {
        		"money": {
        			"gt": 55.8,
        			"lte": 155.8
        		}
        	}
        },
        "sort": [
            {
                "age": "desc"
            },
            {
                "money": "desc"
            }
        ]
    }
    

      由于文本会被分词,所以往往要去做排序会报错,通常我们可以为这个字段增加额外的一个附属属性,类型为keyword,用于做排序。

      创建新的索引

    POST        /shop2/_mapping
    {
        "properties": {
            "id": {
                "type": "long"
            },
            "nickname": {
                "type": "text",
                "analyzer": "ik_max_word",
                "fields": {
                    "keyword": {
                        "type": "keyword"
                    }
                }
            }
        }
    }
    

      

      插入数据

    POST         /shop2/_doc
    {
        "id": 1001,
        "nickname": "美丽的风景"
    }
    {
        "id": 1002,
        "nickname": "漂亮的小哥哥"
    }
    {
        "id": 1003,
        "nickname": "飞翔的巨鹰"
    }
    {
        "id": 1004,
        "nickname": "完美的天空"
    }
    {
        "id": 1005,
        "nickname": "广阔的海域"
    }
    

      

      排序

    {
        "sort": [
            {
                "nickname.keyword": "desc"
            }
        ]
    }
    

      

                                  整理自慕课网《java架构师体系课》

  • 相关阅读:
    浅谈XXE漏洞攻击与防御——本质上就是注入,盗取数据用
    Linux pwn入门教程——CTF比赛
    IDA 逆向工程 反汇编使用
    使用virustotal VT 查询情报——感觉远远没有微步、思科好用,10万条数据查出来5万条都有postives >0的记录,尼玛!!!
    使用VAE、CNN encoder+孤立森林检测ssl加密异常流的初探——真是一个忧伤的故事!!!
    优步每周结算时间:每周二下午4点!
    成都Uber优步司机奖励政策(3月30日)
    北京Uber优步司机奖励政策(3月30日)
    滴滴快车奖励政策,高峰奖励,翻倍奖励,按成交率,指派单数分级(3月30日)
    成都Uber优步司机奖励政策(3月29日)
  • 原文地址:https://www.cnblogs.com/hmxs/p/14923198.html
Copyright © 2020-2023  润新知