• es7.9.0


    elk各版本下载地址 https://elasticsearch.cn/download/

    官方操作文档 https://www.elastic.co/guide/en/elasticsearch/reference/7.x/analyzer-anatomy.html

    参考博客 https://www.jianshu.com/p/29e5b1a7ce61

    # 查询所有cluster健康度
    GET _cat/health?v
    
    # 查看所有index
    GET _cat/indices?v

    # 创建mapping
    PUT /index_demo
    {
      "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
      },
      "mappings": {
        "properties": {
          "name":{
            "type": "text"
          }
        }
      }
    }
    # 指定ik分词器
    # 测试默认的标准分词器
    GET /ecommerce/_analyze
    {
      "analyzer": "standard",
      "text": "a pig is in the house"
    }
    PUT /ecommerce
    {
      "settings": {
        "analysis": {
          "analyzer": {
            "default": {
              "type": "ik_max_word".
          "stopwords": "_english_"
         }
        }
       }
      }
    }

    # 定制分词器 将&转换为and,a 、the不做处理,将html标签过滤掉,将字符转为小写的
    PUT /index_demo
    {
      "settings": {
        "analysis": {
          "char_filter": {
            "&_to_and": {
              "type": "mapping",
              "mappings": ["&=> and"]
            }
          },
          "filter": {
            "my_stopwords":{
              "type": "stop",
              "stopwords": ["the", "a"]
            }
          },
          "analyzer": {
            "my_analyzer":{
              "type":"custom",
              "char_filter": ["html_strip", "&_to_and"],
              "tokenizer":"standard",
              "filter":["lowercase","my_stopwords"]
            }
          }
        }
      }
    }    
    # 新增 有则更新 删除原有再新增  
    POST /ecommerce/_doc/1
    {
      "name":"小米手机",
      "desc":"支持5G、全面屏6.4",
      "price":3000,
      "producer":"小米",
      "tags":["mobile","5G"]
    }
    
    # 新增 有则更新 删除原有再新增  
    PUT /ecommerce/_doc/2
    {
      "name":"华为MacBook",
      "desc":"支持5G、全面屏15.2寸",
      "price":8000,
      "producer":"Huawei",
      "tags":["笔记本电脑","huawei"]
    }
    
    # 显示指定新增 如果id存在则报错
    POST /ecommerce/_create/3
    {
      "name":"华为P40 pro",
      "desc":"支持5G、超清摄像",
      "price":12000,
      "producer":"Huawei 成都",
      "tags":["mobile","huawei","5G"]
    }
    
    # 添加 可重复添加
    POST /ecommerce/_doc
    {
      "name":"Ipad mini 5",
      "desc":"7.9英寸",
      "price":4000,
      "producer":"apple",
      "tags":["笔记本电脑","apple"]
    }
    
    # 更新 如果id不存在则报错
    POST /ecommerce/_update/1
    {
      "doc": {
        "price":2000
      }
    }
    
    # 查询所有document
    GET /ecommerce/_search
    # 查不到不会报错
    GET /ecommerce/_doc/399
    
    # 删除 查不到不会报错
    DELETE /ecommerce/_doc/455
    
    # 查询名称包含华为的商品,并且按照售价降序排序 sort默认升序
    GET /ecommerce/_search?q=name:华为&sort=price:desc
    
    # 只返回source指定字段
    GET /ecommerce/_search?_source=name,price
    
    # 获取index动态映射
    GET /ecommerce/_mapping
    
    # 将一个field索引两次来解决字符串排序问题 一个分词,用来进行搜索;一个不分词,用来进行排序
    GET /ecommerce/_search
    {
      "query": {
        "match_all": {}
      },
      "sort": [
        {
          "tags.keyword": {
            "order": "desc"
          }
        }
      ]
    }
    
    # 同时包含两个keyword
    GET /ecommerce/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米 Huawei",
            "operator": "and"
          }
        }
      }
    }
    
    # 至少包含75%
    GET /ecommerce/_search
    {
      "query": {
        "match": {
          "name": {
            "query": "小米 Huawei apple vivo",
            "minimum_should_match": "75%"
          }
        }
      }
    }
    
    # full-text search 只要有一个keyword匹配即可 constant_score通过boost指定score值 如果缺失boost字段 默认score=1.0 只过滤不计算score constant_score字段可缺失 缺失时score为0
    GET /ecommerce/_search
    {
      "query": {
        "constant_score": {
          "filter": {
            "match": {
              "name": "华为 小米"
            }        
          },
          "boost": 1.2
        }
      }
    }
    
    # 查询所有并分页返回指定字段 从0开始
    GET /ecommerce/_search
    {
      "query": {
        "constant_score": {
          "filter": {
            "match_all": {}
          },
          "boost": 1.2
        }
      },
      "from": 0,
      "size": 3,
      "_source": ["name", "price"]
    }
    
    # filter 5000<=price<=8000 should条件为可选 即可匹配也可不匹配 但匹配会得到更大score minimum_should_match为should中的match至少需要满足几个才能作为结果返回 bool查询可嵌套
    GET /ecommerce/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "name": "华为"
              }
            }
          ],
          "should": [
            {
              "match": {
           "desc": {  
            "query": "5G",
    "boost": 2
           }
    } }, { "match": { "price": "4000" } }, { "match": { "producer": "伯明翰" } } ], "must_not": [ { "match": { "tags": "apple" } },
         {
    "bool": {
    "must": {
            "match": {
              "desc": "xxx"
            } 
           }
          }
    } ],
    "minimum_should_match": 2, "filter": [ { "range": { "price": { "gte": 5000, "lte": 8000 } } } ] } } } # filter改should后score提升1 filter不计算score可提升性能 比较range与filter里的range GET /ecommerce/_search { "query": { "bool": { "must": [ { "match": { "name": "华为" } }, { "range": { "price": { "gte": 5000, "lte": 8000 } } } ], "should": [ { "match": { "desc": "5G" } } ], "must_not": [ { "match": { "tags": "apple" } } ] } } } # must和filter是数组 只有一个时可以简写 GET /ecommerce/_search { "query": { "bool": { "must": { "match": { "name": "华为" } }, "filter": { "range": { "price": { "gte": 5000, "lte": 8000 } } } } } } # phrase search 短语搜索 输入的短语必须完全匹配 GET /ecommerce/_search { "query": { "match_phrase": { "producer": "华为 小米" } } } # 高亮搜索结果 默认plain <em>标签 fragment_size指定高亮字段长度 GET /ecommerce/_search { "query": { "match": { "producer": "Huawei" } }, "highlight": {
       "pre_tags": ["<tag1>"],
    "post_tags": ["<tag2/>"],
    "fields": { "producer": {},
    "type": "plain",
    "fragment_size": 5,
    "number_of_fragments": 2,
    "no_match_size": 150 } } } # aggregations聚合 会报错 (aggregations只能对非keyword字段进行分组 如果需要必须先设置fielddata) size
    =0即不返回分组前的原始数据 terms里的size是聚合结果返回的数量
    PUT /ecommerce/_mapping
    {
      "properties": {
        "tags": {
          "type": "text",
          "fielddata": true
        }
      }
    }
    
    GET /ecommerce/_search
    {
      "size": 0,
      "aggs": {
        "group_by_tags": {
          "terms": {
            "field": "tags""size": 2
          }
        }
      }
    }
    # 验证
    GET /ecommerce/_validate/query?explain
    {
      "query": {
        "match": {
          "nam": "TEXT"
        }
      }
    }
    
    # 多字段匹配
    GET /ecommerce/_search
    {
      "query": {
        "multi_match": {
          "query": "ttt",
          "fields": ["name", "desc"]
        }
      }
    }
    # 计算每个tag下的商品数量 分词后注意tags与tags.keyword的区别 terms中的字段必须是数组类型 否则报错 非数组类型必须使用term 聚合操作只能用terms不能用term
    GET /ecommerce/_search
    {
      "query": {
        "match": {
          "name": "华为"
        }
      },
      "aggs": {
        "all_tags": {
          "terms": {
            "field": "tags.keyword",
            "size": 10
          }
        }
      }
    }
    
    # 计算每个tag下的商品的平均价格,并且按照平均价格降序排序
    GET /ecommerce/_search
    {
      "size": 0,
      "aggs": {
        "all_tags": {
          "terms": {
            "field": "tags",
            "order": {
              "avg_price": "desc"
            }
          },
          "aggs": {
            "avg_price": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      }
    }
    
    # 按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格 按区间分组 0-2000/2000-5000/5000-
    GET /ecommerce/_search
    {
      "size": 0,
      "aggs": {
        "group_by_price": {
          "range": {
            "field": "price",
            "ranges": [
              {
                "to": 2000
              },
              {
                "from": 2000,
                "to": 5000
              },
              {
                "from": 5000
              }
            ]
          },
          "aggs": {
            "group_by_tags": {
              "terms": {
                "field": "tags"
              },
              "aggs": {
                "avg_price": {
                  "avg": {
                    "field": "price"
                  }
                }
              }
            }
          }
        }
      }
    }
    
    # 并发控制乐观锁 带version参数更新
    POST /ecommerce/_update/2?version=3
    {
      "doc": {
        "tags": ["laptop", "Huawei"]
      }
    }
    
    # external指用户自己维护的版本号 如果>=2就可以更新成功
    PUT /ecommerce/_doc/1?version=2&version_type=external
    {
        "name" : "小米10Pro",
        "desc" : "支持5G、全面屏6.4",
        "price" : 3000,
        "producer" : "小米",
        "tags" : [
          "xiaomi",
          "mobile",
          "5G"
        ]
    }
    
    # 在修改冲突时指定重试的次数
    POST /ecommerce/_update/1?retry_on_conflict=2
    {
      "doc": {
        "price": 3000
      }
    }
    
    # mget批量操作 不同的index 并对返回结果过滤
    GET /_mget
    {
      "docs": [
        {
          "_index": "ecommerce",
          "_id": 1,
          "_source": "price"
        },
        {
          "_index": "goods",
          "_id": 2,
          "_source": ["name", "price"]
        }
      ]
    }
    
    # 同一个index不同的field
    GET /ecommerce/_mget
    {
      "docs": [
        {
          "_id": 1
        },
        {
          "_id": 2
        }
      ]
    }
    
    # 同一个index相同的field ids获取时不能指定_source
    GET /ecommerce/_mget
    {
      "ids": [1,2]
    }
    
    # bulk批量操作 括号不能换行 update需要带上doc
    POST /_bulk
    {"delete": {"_index": "ecommerce", "_id": 3}}
    {"create": {"_index": "ecommerce", "_id": 3}}
    {"price":5000}
    {"update":{"_index":"ecommerce","_id":3}}
    {"doc":{"price":6000}}
    
    POST /ecommerce/_bulk
    {"delete": {"_id": 3}}
    {"create": {"_id": 3}}
    {"price": 5000}
    {"update": {"_id": 3}}
    {"doc": {"price": 6000}}
    
    GET /ecommerce/_search?scroll=1ms
    {
      "query": {
        "match_all": {}
      },
      "size": 3
    }
    
    GET /_search/scroll
    {
      "scroll": "1ms",
      "scroll_id": "FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFEtFNnVaWGNCTkZUUmY3VFdxZ1VIAAAAAAAACCwWVFhicjJ5dFpSRENxVVJPdDROcnpkQQ=="
    }

    # 统计每个颜色的平均价格,同时统计每个颜色下每个品牌的平均价格 GET /tvs/_search { "size": 0, "aggs": { "group_by_color": { "terms": { "field": "color" }, "aggs": { "color_avg_price": { "avg": { "field": "price" } }, "group_by_brand": { "terms": { "field": "brand" }, "aggs": { "brand_avg_price": { "avg": { "field": "price" } } } } } } } } # 按照field区间interval分组求和 GET /tvs/_search { "size" : 0, "aggs":{ "price":{ "histogram":{ "field": "price", "interval": 2000 }, "aggs":{ "revenue": { "sum": { "field" : "price" } } } } } } # date histogram之统计每月电视销量 GET /tvs/_search { "size": 0, "aggs": { "sales": { "date_histogram": { "field": "sold_date", "calendar_interval": "month", "format": "yyyy-MM-dd", "min_doc_count": 0, "extended_bounds": { "min": "2019-01-01", "max": "2020-12-31" } } } } } # 下钻分析之统计每季度每个品牌的销售额 GET /tvs/_search { "size": 0, "aggs": { "group_by_sold_date": { "date_histogram": { "field": "sold_date", "calendar_interval": "quarter", "format": "yyyy-MM-dd", "min_doc_count": 0, "extended_bounds": { "min": "2016-01-01", "max": "2017-12-31" } }, "aggs": { "group_by_brand": { "terms": { "field": "brand" }, "aggs": { "sum_price": { "sum": { "field": "price" } } } }, "total_sum_price": { "sum": { "field": "price" } } } } } } # global bucket:单个品牌与所有品牌销量对比 GET /tvs/_search { "size": 0, "query": { "term": { "brand": { "value": "长虹" } } }, "aggs": { "single_brand_avg_price": { "avg": { "field": "price" } }, "all": { "global": {}, "aggs": { "all_brand_avg_price": { "avg": { "field": "price" } } } } } } # bucket filter:统计牌品最近一个月的平均价格 GET /tvs/_search { "size": 0, "query": { "term": { "brand": { "value": "长虹" } } }, "aggs": { "recent_150d": { "filter": { "range": { "sold_date": { "gte": "now-150d" } } }, "aggs": { "recent_150d_avg_price": { "avg": { "field": "price" } } } }, "recent_140d": { "filter": { "range": { "sold_date": { "gte": "now-140d" } } }, "aggs": { "recent_140d_avg_price": { "avg": { "field": "price" } } } }, "recent_130d": { "filter": { "range": { "sold_date": { "gte": "now-130d" } } }, "aggs": { "recent_130d_avg_price": { "avg": { "field": "price" } } } } } } # cardinality去重算法以及每月销售品牌数量统计 precision_threshold表示确保field的unique value在指定个数内时 结果100%正确 但需额外占用100*8个字节 GET /tvs/_search { "size" : 0, "aggs" : { "months" : { "date_histogram": { "field": "sold_date", "calendar_interval": "month" }, "aggs": { "distinct_colors" : { "cardinality" : { "field" : "brand",
              "precision_threshold": 100 } } } } } }

    # 多字段搜索之best fields:主要是将某一个field匹配尽可能多的关键词的doc优先返回回来

    # dis_max只取某一个query最大的分数返回,完全不考虑其他query的分数 tie_breaker将其他query的分数,乘以tie_breaker,然后综合那个最高分的query,一起计算并返回 0<tie_breaker<1 minimum_should_match 去长尾 只有匹配至少这个值的字段才返回

    GET /forum/_search
    {
      "query": {
        "multi_match": {
            "query":                "java solution",
            "type":                 "best_fields", 
            "fields":               [ "title^2", "content" ],
            "tie_breaker":          0.3,
            "minimum_should_match": "50%" 
        }
      } 
    }
    
    GET /forum/_search
    {
      "query": {
        "dis_max": {
          "queries":  [
            {
              "match": {
                "title": {
                  "query": "java beginner",
                  "minimum_should_match": "50%",
                  "boost": 2
                }
              }
            },
            {
              "match": {
                "body": {
                  "query": "java beginner",
                  "minimum_should_match": "30%"
                }
              }
            }
          ],
          "tie_breaker": 0.3
        }
      } 
    }
    # 多字段搜索之most fields:主要是尽可能返回更多field匹配到某个关键词的doc,优先返回回来
    GET /forum/_search
    {
       "query": {
            "multi_match": {
                "query":  "learning courses",
                "type":   "most_fields", 
                "fields": [ "sub_title", "sub_title.std" ]
            }
        }
    }

    # most fields 与 best fields 各有优缺点 可使用原生cross fields
    GET /forum/_search
    {
      "query": {
        "multi_match": {
          "query": "Peter Smith",
          "type": "cross_fields", 
          "operator": "and",
          "fields": ["author_first_name", "author_last_name"]
        }
      }
    } 

    # 近似匹配 包含java或elasticsearch或两者之间距离不超过50的前50条数据
    GET /forum/_search 
    {
      "query": {
        "match": {
          "content": "java elasticsearch"
        }
      },
      "rescore": {
        "window_size": 50,
        "query": {
          "rescore_query": {
            "match_phrase": {
              "content": {
                "query": "java elasticsearch",
                "slop": 50
              }
            }
          }
        }
      }
    }

    # 前缀搜索 和filter一样不计算score 单不会像filter一样利用缓存 而是在document中逐一比对 prefix越短 性能越差 此外还有通配符搜索、正则搜索 性能都差
    GET /forum/_search
    {
      "query": {
        "prefix": {
          "articleID.keyword": {
            "value": "X"
          }
        }
      }
    }

    # fuzzy 模糊搜索 自动将拼写错误的搜索文本,进行纠正,纠正以后去尝试匹配索引中的数据 fuzziness 指定的修订最大次数,默认为2
    GET /forum/_search
    {
      "query": {
        "fuzzy": {
          "title": {
            "value": "hell",
            "fuzziness": 2
          }
        }
      }
    }
    GET /forum/_search
    {
      "query": {
        "match": {
          "title": {
            "query": "helio",
            "fuzziness": "AUTO",
            "operator": "and"
          }
        }
      }
    }

    # percentiles 分别统计latency字段50%、95%、99%的百分位数
    GET /website/_search 
    {
      "size": 0,
      "aggs": {
        "latency_percentiles": {
          "percentiles": {
            "field": "latency",
            "percents": [
              50,
              95,
              99
            ]
          }
        }
      }
    }

    # percentile ranks 计算指定field值在200内占比、600内占比
    GET /website/_search 
    {
      "size": 0,
      "aggs": {
        "group_by_province": {
          "terms": {
            "field": "province"
          },
          "aggs": {
            "latency_percentile_ranks": {
              "percentile_ranks": {
                "field": "latency",
                "values": [
                  200,
                  600
                ]
              }
            }
          }
        }
      }
    }

    # nested object 类似这样的嵌套对象结构
    PUT /website/_doc/6
    {
      "title": "花无缺发表的一篇帖子",
      "content":  "我是花无缺,大家要不要考虑一下投资房产和买股票的事情啊。。。",
      "tags":  [ "投资", "理财" ],
      "comments": [ 
        {
          "name":    "小鱼儿",
          "comment": "什么股票啊?推荐一下呗",
          "age":     28,
          "stars":   4,
          "date":    "2016-09-01"
        },
        {
          "name":    "黄药师",
          "comment": "我喜欢投资房产,风,险大收益也大",
          "age":     31,
          "stars":   5,
          "date":    "2016-10-22"
        }
      ]
    }
    # 想搜索嵌套对象的多个字段 以下搜索不能成功
    GET /website/_search
    {
      "query": {
        "bool": {
          "must": [
            { "match": { "comments.name": "黄药师" }},
            { "match": { "comments.age":  28      }} 
          ]
        }
      }
    }
    # 需要先修改原有mapping结构
    PUT /website
    {
      "mappings": {
          "properties": {
            "comments": {
              "type": "nested", 
              "properties": {
                "name":    { "type": "text"  },
                "comment": { "type": "text"  },
                "age":     { "type": "short"   },
                "stars":   { "type": "short"   },
                "date":    { "type": "date"    }
              }
            }
          }
        }
    }
    # 并重新组织DSL
    GET /website/_search 
    {
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "title": "花无缺"
              }
            },
            {
              "nested": {
                "path": "comments",
                "query": {
                  "bool": {
                    "must": [
                      {
                        "match": {
                          "comments.name": "黄药师"
                        }
                      },
                      {
                        "match": {
                          "comments.age": 31
                        }
                      }
                    ]
                  }
                }
              }
            }
          ]
        }
      }
    }

    # nested object 聚合操作
    GET /website/_search 
    {
      "size": 0, 
      "aggs": {
        "comments_path": {
          "nested": {
            "path": "comments"
          }, 
          "aggs": {
            "group_by_comments_date": {
              "date_histogram": {
                "field": "comments.date",
                "calendar_interval": "month",
                "format": "yyyy-MM"
              },
              "aggs": {
                "avg_stars": {
                  "avg": {
                    "field": "comments.stars"
                  }
                }
              }
            }
          }
        }
      }
    }

    # 在nested object字段分组中进行非nested object字段分组
    GET /website/_search 
    {
      "size": 0,
      "aggs": {
        "comments_path": {
          "nested": {
            "path": "comments"
          },
          "aggs": {
            "group_by_comments_age": {
              "histogram": {
                "field": "comments.age",
                "interval": 10
              },
              "aggs": {
                "reverse_path": {
                  "reverse_nested": {}, 
                  "aggs": {
                    "group_by_tags": {
                      "terms": {
                        "field": "tags.keyword"
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  • 相关阅读:
    7.13dfs例题:部分和
    7.12dfs例题:数独游戏
    1.2题解:如何找数组中唯一成对的那个数(位运算)
    左程云Java算法(1)
    SQL基本语句增删改查
    Python spyder Ipython console 连接失败问题
    VBA——Msgbox
    python 字符串
    Scrapy-selectors总结
    文字单行居中,多行居左/居右
  • 原文地址:https://www.cnblogs.com/agasha/p/14365639.html
Copyright © 2020-2023  润新知