• Elasticsearch 6.5.4 安装IK Analysis插件


    1.找到合适的版本

    IK versionES version
    6.1.1 6.1.1
    5.6.4 5.6.4
    5.5.3 5.5.3
    5.4.3 5.4.3
    5.3.3 5.3.3
    5.2.2 5.2.2
    5.1.2 5.1.2
    1.10.6 2.4.6
    1.9.5 2.3.5
    1.8.1 2.2.1
    1.7.0 2.1.1
    1.5.0 2.0.0
    1.2.6 1.0.0
    1.2.5 0.90.x
    1.1.3 0.20.x
    1.0.0 0.16.2 -> 0.19.0

    2.下载对应的zip包

    对应版本的zip包

    在本地elasticsearch根目录下的plugins下新建ik文件夹,将zip包中内容解压到该文件夹下。

    现在重启es就可以了

    3. 安装kibana进场测试

    创建index,type

    PUT /yf-springboot-es-ik1
    
    POST /yf-springboot-es-ik1/springboot-test/_mapping
    {
      "springboot-test": {
        "properties": {
          "keyword": {
            "type": "text",
            "analyzer": "ik_max_word",
            "search_analyzer": "ik_max_word"
            
          }
        }
      }
    }

    使用浏览器进行测试:

    http://localhost:9200/yf-springboot-es-ik1/springboot-test/14/_termvectors?fields=keyword

    ip + 端口 + index + type + id + _termvectors + ?fields = 搜索字段

    POST /yf-springboot-es-ik1/springboot-test/14
    {"keyword":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
    {
        "_index": "yf-springboot-es-ik1",
        "_type": "springboot-test",
        "_id": "14",
        "_version": 4,
        "found": true,
        "took": 0,
        "term_vectors": {
            "keyword": {
                "field_statistics": {
                    "sum_doc_freq": 14,
                    "doc_count": 1,
                    "sum_ttf": 14
                },
                "terms": {
                    "中国": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 0,
                                "start_offset": 0,
                                "end_offset": 2
                            }
                        ]
                    },
                    "亚裔": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 7,
                                "start_offset": 10,
                                "end_offset": 12
                            }
                        ]
                    },
                    "嫌犯": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 11,
                                "start_offset": 17,
                                "end_offset": 19
                            }
                        ]
                    },
                    "子枪": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 9,
                                "start_offset": 13,
                                "end_offset": 15
                            }
                        ]
                    },
                    "": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 12,
                                "start_offset": 19,
                                "end_offset": 20
                            }
                        ]
                    },
                    "枪击": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 10,
                                "start_offset": 14,
                                "end_offset": 16
                            }
                        ]
                    },
                    "洛杉矶": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 2,
                                "start_offset": 3,
                                "end_offset": 6
                            }
                        ]
                    },
                    "男子": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 8,
                                "start_offset": 12,
                                "end_offset": 14
                            }
                        ]
                    },
                    "自首": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 13,
                                "start_offset": 20,
                                "end_offset": 22
                            }
                        ]
                    },
                    "": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 6,
                                "start_offset": 9,
                                "end_offset": 10
                            }
                        ]
                    },
                    "领事": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 4,
                                "start_offset": 6,
                                "end_offset": 8
                            }
                        ]
                    },
                    "领事馆": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 3,
                                "start_offset": 6,
                                "end_offset": 9
                            }
                        ]
                    },
                    "": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 5,
                                "start_offset": 8,
                                "end_offset": 9
                            }
                        ]
                    },
                    "": {
                        "term_freq": 1,
                        "tokens": [
                            {
                                "position": 1,
                                "start_offset": 2,
                                "end_offset": 3
                            }
                        ]
                    }
                }
            }
        }
    }

     使用kibana添加数据并且查询

    POST /yf-springboot-es-ik1/springboot-test/11
    {"keyword":"美国留给伊拉克的是个烂摊子吗"}
    
    POST /yf-springboot-es-ik1/springboot-test/12
    {"keyword":"公安部:各地校车将享最高路权"}
    
    POST /yf-springboot-es-ik1/springboot-test/13
    {"keyword":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}
    
    POST /yf-springboot-es-ik1/springboot-test/14
    {"keyword":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
    
    POST /yf-springboot-es-ik1/springboot-test/_search
    {
        "query" : { "match" : { "keyword" : "中国" }},
        "highlight" : {
            "pre_tags" : ["<tag1>", "<tag2>"],
            "post_tags" : ["</tag1>", "</tag2>"],
            "fields" : {
                "keyword" : {}
            }
        }
    }

    查询结果:

    {
      "took" : 0,
      "timed_out" : false,
      "_shards" : {
        "total" : 5,
        "successful" : 5,
        "skipped" : 0,
        "failed" : 0
      },
      "hits" : {
        "total" : 2,
        "max_score" : 0.5480699,
        "hits" : [
          {
            "_index" : "yf-springboot-es-ik1",
            "_type" : "springboot-test",
            "_id" : "13",
            "_score" : 0.5480699,
            "_source" : {
              "keyword" : "中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"
            },
            "highlight" : {
              "keyword" : [
                "中韩渔警冲突调查:韩警平均每天扣1艘<tag1>中国</tag1>渔船"
              ]
            }
          },
          {
            "_index" : "yf-springboot-es-ik1",
            "_type" : "springboot-test",
            "_id" : "14",
            "_score" : 0.2876821,
            "_source" : {
              "keyword" : "中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"
            },
            "highlight" : {
              "keyword" : [
                "<tag1>中国</tag1>驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"
              ]
            }
          }
        ]
      }
    }

    4.ik_max_word 和 ik_smart 什么区别?

    ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合;

    ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”

  • 相关阅读:
    Ant.OutputIsUnreadableCode
    Android.HowToDesignPluginArchitectureInAndroidApp
    Java.FamousBlogs
    Java.WeakReference-SoftReference-PhantomReference
    DataStructure.BloomFilter
    Android.HowToDefineCustomView
    Android.Study.Question
    Android.PublishApplication
    Android.Libraries
    Site.AboutHardware
  • 原文地址:https://www.cnblogs.com/yangfei-beijing/p/10220983.html
Copyright © 2020-2023  润新知