elk各版本下载地址 https://elasticsearch.cn/download/
官方操作文档 https://www.elastic.co/guide/en/elasticsearch/reference/7.x/analyzer-anatomy.html
参考博客 https://www.jianshu.com/p/29e5b1a7ce61
# 查询所有cluster健康度 GET _cat/health?v # 查看所有index GET _cat/indices?v
# 创建mapping
PUT /index_demo
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"name":{
"type": "text"
}
}
}
}
# 指定ik分词器
# 测试默认的标准分词器
GET /ecommerce/_analyze
{
"analyzer": "standard",
"text": "a pig is in the house"
}
PUT /ecommerce { "settings": { "analysis": { "analyzer": { "default": { "type": "ik_max_word".
"stopwords": "_english_"
}
}
}
}
}
# 定制分词器 将&
转换为and,a 、the
不做处理,将html标签过滤掉,将字符转为小写的
PUT /index_demo { "settings": { "analysis": { "char_filter": { "&_to_and": { "type": "mapping", "mappings": ["&=> and"] } }, "filter": { "my_stopwords":{ "type": "stop", "stopwords": ["the", "a"] } }, "analyzer": { "my_analyzer":{ "type":"custom", "char_filter": ["html_strip", "&_to_and"], "tokenizer":"standard", "filter":["lowercase","my_stopwords"] } } } } }
# 新增 有则更新 删除原有再新增 POST /ecommerce/_doc/1 { "name":"小米手机", "desc":"支持5G、全面屏6.4", "price":3000, "producer":"小米", "tags":["mobile","5G"] } # 新增 有则更新 删除原有再新增 PUT /ecommerce/_doc/2 { "name":"华为MacBook", "desc":"支持5G、全面屏15.2寸", "price":8000, "producer":"Huawei", "tags":["笔记本电脑","huawei"] } # 显示指定新增 如果id存在则报错 POST /ecommerce/_create/3 { "name":"华为P40 pro", "desc":"支持5G、超清摄像", "price":12000, "producer":"Huawei 成都", "tags":["mobile","huawei","5G"] } # 添加 可重复添加 POST /ecommerce/_doc { "name":"Ipad mini 5", "desc":"7.9英寸", "price":4000, "producer":"apple", "tags":["笔记本电脑","apple"] } # 更新 如果id不存在则报错 POST /ecommerce/_update/1 { "doc": { "price":2000 } } # 查询所有document GET /ecommerce/_search # 查不到不会报错 GET /ecommerce/_doc/399 # 删除 查不到不会报错 DELETE /ecommerce/_doc/455 # 查询名称包含华为的商品,并且按照售价降序排序 sort默认升序 GET /ecommerce/_search?q=name:华为&sort=price:desc # 只返回source指定字段 GET /ecommerce/_search?_source=name,price # 获取index动态映射 GET /ecommerce/_mapping # 将一个field索引两次来解决字符串排序问题 一个分词,用来进行搜索;一个不分词,用来进行排序 GET /ecommerce/_search { "query": { "match_all": {} }, "sort": [ { "tags.keyword": { "order": "desc" } } ] } # 同时包含两个keyword GET /ecommerce/_search { "query": { "match": { "name": { "query": "小米 Huawei", "operator": "and" } } } } # 至少包含75% GET /ecommerce/_search { "query": { "match": { "name": { "query": "小米 Huawei apple vivo", "minimum_should_match": "75%" } } } } # full-text search 只要有一个keyword匹配即可 constant_score通过boost指定score值 如果缺失boost字段 默认score=1.0 只过滤不计算score constant_score字段可缺失 缺失时score为0 GET /ecommerce/_search { "query": { "constant_score": { "filter": { "match": { "name": "华为 小米" } }, "boost": 1.2 } } } # 查询所有并分页返回指定字段 从0开始 GET /ecommerce/_search { "query": { "constant_score": { "filter": { "match_all": {} }, "boost": 1.2 } }, "from": 0, "size": 3, "_source": ["name", "price"] } # filter 5000<=price<=8000 should条件为可选 即可匹配也可不匹配 但匹配会得到更大score minimum_should_match为should中的match至少需要满足几个才能作为结果返回 bool查询可嵌套 GET /ecommerce/_search { "query": { "bool": { "must": [ { "match": { "name": "华为" } } ], "should": [ { "match": {
"desc": {
"query": "5G",
"boost": 2
} } }, { "match": { "price": "4000" } }, { "match": { "producer": "伯明翰" } } ], "must_not": [ { "match": { "tags": "apple" } },
{
"bool": {
"must": {
"match": {
"desc": "xxx"
}
}
}
} ], "minimum_should_match": 2, "filter": [ { "range": { "price": { "gte": 5000, "lte": 8000 } } } ] } } } # filter改should后score提升1 filter不计算score可提升性能 比较range与filter里的range GET /ecommerce/_search { "query": { "bool": { "must": [ { "match": { "name": "华为" } }, { "range": { "price": { "gte": 5000, "lte": 8000 } } } ], "should": [ { "match": { "desc": "5G" } } ], "must_not": [ { "match": { "tags": "apple" } } ] } } } # must和filter是数组 只有一个时可以简写 GET /ecommerce/_search { "query": { "bool": { "must": { "match": { "name": "华为" } }, "filter": { "range": { "price": { "gte": 5000, "lte": 8000 } } } } } } # phrase search 短语搜索 输入的短语必须完全匹配 GET /ecommerce/_search { "query": { "match_phrase": { "producer": "华为 小米" } } } # 高亮搜索结果 默认plain <em>标签 fragment_size指定高亮字段长度 GET /ecommerce/_search { "query": { "match": { "producer": "Huawei" } }, "highlight": {
"pre_tags": ["<tag1>"],
"post_tags": ["<tag2/>"], "fields": { "producer": {},
"type": "plain",
"fragment_size": 5,
"number_of_fragments": 2,
"no_match_size": 150 } } } # aggregations聚合 会报错 (aggregations只能对非keyword字段进行分组 如果需要必须先设置fielddata) size=0即不返回分组前的原始数据 terms里的size是聚合结果返回的数量
PUT /ecommerce/_mapping { "properties": { "tags": { "type": "text", "fielddata": true } } } GET /ecommerce/_search { "size": 0, "aggs": { "group_by_tags": { "terms": { "field": "tags", "size": 2 } } } }
# 验证 GET /ecommerce/_validate/query?explain { "query": { "match": { "nam": "TEXT" } } } # 多字段匹配 GET /ecommerce/_search { "query": { "multi_match": { "query": "ttt", "fields": ["name", "desc"] } } } # 计算每个tag下的商品数量 分词后注意tags与tags.keyword的区别 terms中的字段必须是数组类型 否则报错 非数组类型必须使用term 聚合操作只能用terms不能用term GET /ecommerce/_search { "query": { "match": { "name": "华为" } }, "aggs": { "all_tags": { "terms": { "field": "tags.keyword", "size": 10 } } } } # 计算每个tag下的商品的平均价格,并且按照平均价格降序排序 GET /ecommerce/_search { "size": 0, "aggs": { "all_tags": { "terms": { "field": "tags", "order": { "avg_price": "desc" } }, "aggs": { "avg_price": { "avg": { "field": "price" } } } } } } # 按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格 按区间分组 0-2000/2000-5000/5000- GET /ecommerce/_search { "size": 0, "aggs": { "group_by_price": { "range": { "field": "price", "ranges": [ { "to": 2000 }, { "from": 2000, "to": 5000 }, { "from": 5000 } ] }, "aggs": { "group_by_tags": { "terms": { "field": "tags" }, "aggs": { "avg_price": { "avg": { "field": "price" } } } } } } } } # 并发控制乐观锁 带version参数更新 POST /ecommerce/_update/2?version=3 { "doc": { "tags": ["laptop", "Huawei"] } } # external指用户自己维护的版本号 如果>=2就可以更新成功 PUT /ecommerce/_doc/1?version=2&version_type=external { "name" : "小米10Pro", "desc" : "支持5G、全面屏6.4", "price" : 3000, "producer" : "小米", "tags" : [ "xiaomi", "mobile", "5G" ] } # 在修改冲突时指定重试的次数 POST /ecommerce/_update/1?retry_on_conflict=2 { "doc": { "price": 3000 } } # mget批量操作 不同的index 并对返回结果过滤 GET /_mget { "docs": [ { "_index": "ecommerce", "_id": 1, "_source": "price" }, { "_index": "goods", "_id": 2, "_source": ["name", "price"] } ] } # 同一个index不同的field GET /ecommerce/_mget { "docs": [ { "_id": 1 }, { "_id": 2 } ] } # 同一个index相同的field ids获取时不能指定_source GET /ecommerce/_mget { "ids": [1,2] } # bulk批量操作 括号不能换行 update需要带上doc POST /_bulk {"delete": {"_index": "ecommerce", "_id": 3}} {"create": {"_index": "ecommerce", "_id": 3}} {"price":5000} {"update":{"_index":"ecommerce","_id":3}} {"doc":{"price":6000}} POST /ecommerce/_bulk {"delete": {"_id": 3}} {"create": {"_id": 3}} {"price": 5000} {"update": {"_id": 3}} {"doc": {"price": 6000}} GET /ecommerce/_search?scroll=1ms { "query": { "match_all": {} }, "size": 3 } GET /_search/scroll { "scroll": "1ms", "scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFEtFNnVaWGNCTkZUUmY3VFdxZ1VIAAAAAAAACCwWVFhicjJ5dFpSRENxVVJPdDROcnpkQQ==" }
# 统计每个颜色的平均价格,同时统计每个颜色下每个品牌的平均价格 GET /tvs/_search { "size": 0, "aggs": { "group_by_color": { "terms": { "field": "color" }, "aggs": { "color_avg_price": { "avg": { "field": "price" } }, "group_by_brand": { "terms": { "field": "brand" }, "aggs": { "brand_avg_price": { "avg": { "field": "price" } } } } } } } } # 按照field区间interval分组求和 GET /tvs/_search { "size" : 0, "aggs":{ "price":{ "histogram":{ "field": "price", "interval": 2000 }, "aggs":{ "revenue": { "sum": { "field" : "price" } } } } } } # date histogram之统计每月电视销量 GET /tvs/_search { "size": 0, "aggs": { "sales": { "date_histogram": { "field": "sold_date", "calendar_interval": "month", "format": "yyyy-MM-dd", "min_doc_count": 0, "extended_bounds": { "min": "2019-01-01", "max": "2020-12-31" } } } } } # 下钻分析之统计每季度每个品牌的销售额 GET /tvs/_search { "size": 0, "aggs": { "group_by_sold_date": { "date_histogram": { "field": "sold_date", "calendar_interval": "quarter", "format": "yyyy-MM-dd", "min_doc_count": 0, "extended_bounds": { "min": "2016-01-01", "max": "2017-12-31" } }, "aggs": { "group_by_brand": { "terms": { "field": "brand" }, "aggs": { "sum_price": { "sum": { "field": "price" } } } }, "total_sum_price": { "sum": { "field": "price" } } } } } } # global bucket:单个品牌与所有品牌销量对比 GET /tvs/_search { "size": 0, "query": { "term": { "brand": { "value": "长虹" } } }, "aggs": { "single_brand_avg_price": { "avg": { "field": "price" } }, "all": { "global": {}, "aggs": { "all_brand_avg_price": { "avg": { "field": "price" } } } } } } # bucket filter:统计牌品最近一个月的平均价格 GET /tvs/_search { "size": 0, "query": { "term": { "brand": { "value": "长虹" } } }, "aggs": { "recent_150d": { "filter": { "range": { "sold_date": { "gte": "now-150d" } } }, "aggs": { "recent_150d_avg_price": { "avg": { "field": "price" } } } }, "recent_140d": { "filter": { "range": { "sold_date": { "gte": "now-140d" } } }, "aggs": { "recent_140d_avg_price": { "avg": { "field": "price" } } } }, "recent_130d": { "filter": { "range": { "sold_date": { "gte": "now-130d" } } }, "aggs": { "recent_130d_avg_price": { "avg": { "field": "price" } } } } } } # cardinality去重算法以及每月销售品牌数量统计 precision_threshold表示确保field的unique value在指定个数内时 结果100%正确 但需额外占用100*8个字节 GET /tvs/_search { "size" : 0, "aggs" : { "months" : { "date_histogram": { "field": "sold_date", "calendar_interval": "month" }, "aggs": { "distinct_colors" : { "cardinality" : { "field" : "brand",
"precision_threshold": 100 } } } } } }
# 多字段搜索之best fields:主要是将某一个field匹配尽可能多的关键词的doc优先返回回来
# dis_max只取某一个query最大的分数返回,完全不考虑其他query的分数 tie_breaker将其他query的分数,乘以tie_breaker,然后综合那个最高分的query,一起计算并返回 0<tie_breaker<1 minimum_should_match 去长尾 只有匹配至少这个值的字段才返回
GET /forum/_search { "query": { "multi_match": { "query": "java solution", "type": "best_fields", "fields": [ "title^2", "content" ], "tie_breaker": 0.3, "minimum_should_match": "50%" } } } GET /forum/_search { "query": { "dis_max": { "queries": [ { "match": { "title": { "query": "java beginner", "minimum_should_match": "50%", "boost": 2 } } }, { "match": { "body": { "query": "java beginner", "minimum_should_match": "30%" } } } ], "tie_breaker": 0.3 } } }
# 多字段搜索之most fields:主要是尽可能返回更多field匹配到某个关键词的doc,优先返回回来
GET /forum/_search { "query": { "multi_match": { "query": "learning courses", "type": "most_fields", "fields": [ "sub_title", "sub_title.std" ] } } }
# most fields 与 best fields 各有优缺点 可使用原生cross fields
GET /forum/_search { "query": { "multi_match": { "query": "Peter Smith", "type": "cross_fields", "operator": "and", "fields": ["author_first_name", "author_last_name"] } } }
# 近似匹配 包含java或elasticsearch或两者之间距离不超过50的前50条数据
GET /forum/_search { "query": { "match": { "content": "java elasticsearch" } }, "rescore": { "window_size": 50, "query": { "rescore_query": { "match_phrase": { "content": { "query": "java elasticsearch", "slop": 50 } } } } } }
# 前缀搜索 和filter一样不计算score 单不会像filter一样利用缓存 而是在document中逐一比对 prefix越短 性能越差 此外还有通配符搜索、正则搜索 性能都差
GET /forum/_search { "query": { "prefix": { "articleID.keyword": { "value": "X" } } } }
# fuzzy 模糊搜索 自动将拼写错误的搜索文本,进行纠正,纠正以后去尝试匹配索引中的数据 fuzziness 指定的修订最大次数,默认为2
GET /forum/_search { "query": { "fuzzy": { "title": { "value": "hell", "fuzziness": 2 } } } }
GET /forum/_search { "query": { "match": { "title": { "query": "helio", "fuzziness": "AUTO", "operator": "and" } } } }
# percentiles 分别统计latency字段50%、95%、99%的百分位数
GET /website/_search { "size": 0, "aggs": { "latency_percentiles": { "percentiles": { "field": "latency", "percents": [ 50, 95, 99 ] } } } }
# percentile ranks 计算指定field值在200内占比、600内占比
GET /website/_search { "size": 0, "aggs": { "group_by_province": { "terms": { "field": "province" }, "aggs": { "latency_percentile_ranks": { "percentile_ranks": { "field": "latency", "values": [ 200, 600 ] } } } } } }
# nested object 类似这样的嵌套对象结构 PUT /website/_doc/6 { "title": "花无缺发表的一篇帖子", "content": "我是花无缺,大家要不要考虑一下投资房产和买股票的事情啊。。。", "tags": [ "投资", "理财" ], "comments": [ { "name": "小鱼儿", "comment": "什么股票啊?推荐一下呗", "age": 28, "stars": 4, "date": "2016-09-01" }, { "name": "黄药师", "comment": "我喜欢投资房产,风,险大收益也大", "age": 31, "stars": 5, "date": "2016-10-22" } ] } # 想搜索嵌套对象的多个字段 以下搜索不能成功 GET /website/_search { "query": { "bool": { "must": [ { "match": { "comments.name": "黄药师" }}, { "match": { "comments.age": 28 }} ] } } } # 需要先修改原有mapping结构 PUT /website { "mappings": { "properties": { "comments": { "type": "nested", "properties": { "name": { "type": "text" }, "comment": { "type": "text" }, "age": { "type": "short" }, "stars": { "type": "short" }, "date": { "type": "date" } } } } } } # 并重新组织DSL GET /website/_search { "query": { "bool": { "must": [ { "match": { "title": "花无缺" } }, { "nested": { "path": "comments", "query": { "bool": { "must": [ { "match": { "comments.name": "黄药师" } }, { "match": { "comments.age": 31 } } ] } } } } ] } } }
# nested object 聚合操作
GET /website/_search { "size": 0, "aggs": { "comments_path": { "nested": { "path": "comments" }, "aggs": { "group_by_comments_date": { "date_histogram": { "field": "comments.date", "calendar_interval": "month", "format": "yyyy-MM" }, "aggs": { "avg_stars": { "avg": { "field": "comments.stars" } } } } } } } }
# 在nested object字段分组中进行非nested object字段分组
GET /website/_search { "size": 0, "aggs": { "comments_path": { "nested": { "path": "comments" }, "aggs": { "group_by_comments_age": { "histogram": { "field": "comments.age", "interval": 10 }, "aggs": { "reverse_path": { "reverse_nested": {}, "aggs": { "group_by_tags": { "terms": { "field": "tags.keyword" } } } } } } } } } }