• springboot使用rest-high-level-client集成elasticsearch 7.5.1


    添加pom

     <!--elasticsearch-->
            <dependency>
                <groupId>org.elasticsearch.client</groupId>
                <artifactId>elasticsearch-rest-high-level-client</artifactId>
                <version>7.5.1</version>
                <exclusions>
                    <exclusion>
                        <groupId>org.elasticsearch</groupId>
                        <artifactId>elasticsearch</artifactId>
                    </exclusion>
                    <exclusion>
                        <groupId>org.elasticsearch.client</groupId>
                        <artifactId>elasticsearch-rest-client</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>org.elasticsearch.client</groupId>
                <artifactId>elasticsearch-rest-client</artifactId>
                <version>7.5.1</version>
            </dependency>
            <dependency>
                <groupId>org.elasticsearch</groupId>
                <artifactId>elasticsearch</artifactId>
                <version>7.5.1</version>
            </dependency>

    yml添加配置

    es:
      host: 192.168.1.107
      port: 9200
      scheme: http

    初始化client

    package com.zh.search.config;
    
    
    import org.apache.http.HttpHost;
    import org.elasticsearch.client.RestClient;
    import org.elasticsearch.client.RestClientBuilder;
    import org.elasticsearch.client.RestHighLevelClient;
    import org.springframework.beans.factory.annotation.Autowired;
    import org.springframework.beans.factory.annotation.Value;
    import org.springframework.context.annotation.Bean;
    import org.springframework.context.annotation.Configuration;
    
    @Configuration
    public class ElasticConfig {
    
        @Value("${es.host}")
        public String host;
        @Value("${es.port}")
        public int port;
        @Value("${es.scheme}")
        public String scheme;
    
        @Bean
        public RestClientBuilder restClientBuilder() {
            return RestClient.builder(makeHttpHost());
        }
    
        @Bean
        public RestClient restClient(){
            return RestClient.builder(new HttpHost(host, port, scheme)).build();
        }
    
        private HttpHost makeHttpHost() {
            return new HttpHost(host, port, scheme);
        }
    
        @Bean
        public RestHighLevelClient restHighLevelClient(@Autowired RestClientBuilder restClientBuilder){
            return new RestHighLevelClient(restClientBuilder);
        }
    }

    在resource下创建索引配置json文件,

    settings.json

    {
      "number_of_shards": 5,
      "number_of_replicas": 1,
      "refresh_interval": "5s",
      "analysis": {
        "analyzer": {
    //      ik细粒度
          "ikSearchAnalyzer": {
            "type": "custom",
            "tokenizer": "ik_max_word",
            "char_filter": [
              "tsconvert"
            ]
          },
    //      ik粗粒度分词
          "ikSmartSearchAnalyzer": {
            "type": "custom",
            "tokenizer": "ik_smart",
            "char_filter": [
              "tsconvert"
            ]
          },
    //      拼音分词
          "pinyinSimpleAnalyzer": {
            "tokenizer": "my_pinyin"
          },
    //      拼音,大小写,短语分词
          "pinyinComplexAnalyzer": {
            "tokenizer": "ik_smart",
            "filter": [
              "lowercase",
              "pinyin_simple_filter",
              "edge_ngram_filter"
            ]
          },
    //      大小写转换分词
          "lowercaseAnalyzer": {
            "type": "custom",
            "tokenizer": "keyword",
            "filter": "lowercase"
          }
        },
        "tokenizer" : {
          "my_pinyin" : {
            "type" : "pinyin",
            "keep_separate_first_letter" : false,
            "keep_full_pinyin" : true,
            "keep_original" : true,
            "limit_first_letter_length" : 16,
            "lowercase" : true,
            "remove_duplicated_term" : true
          }
        },
        "filter": {
    //      短语过滤
          "edge_ngram_filter": {
            "type": "edge_ngram",
            "min_gram": 1,
            "max_gram": 50
          },
    //      拼音过滤
          "pinyin_simple_filter": {
            "type": "pinyin",
            "first_letter": "prefix",
            "padding_char": " ",
            "limit_first_letter_length": 50, //设置first_letter结果的最大长度,默认值:16
    //        "keep_separate_first_letter" : false, //启用该选项时,将保留第一个字母分开,例如:刘德华> l,d,h,默认:false,注意:查询结果也许是太模糊,由于长期过频
    //        "keep_full_pinyin" : true,  //当启用该选项,例如:刘德华> [ liu,de,hua],默认值:true
    //        "keep_original" : true, //启用此选项时,也将保留原始输入,默认值:false
    //        "remove_duplicated_term" : true,  //启用此选项后,将删除重复的术语以保存索引,例如:de的> de,default:false,注意:位置相关的查询可能会受到影响
            "lowercase": true //小写非中文字母,默认值:true
          }
        },
        "char_filter": {
    //      简繁体过滤
          "tsconvert": {
            "type": "stconvert",
            "convert_type": "t2s"
          }
        }
      }
    }

    创建索引映射文件

    commodity-mapping.json

    {
      "properties": {
        "id": {
          "type": "integer"
        },
        "keyword": {
          //text和keyword的区别text:存储数据时候,会自动分词,并生成索引,keyword:存储数据时候,不会分词建立索引
          "type": "text",
          "analyzer": "ikSearchAnalyzer",
          "search_analyzer": "ikSmartSearchAnalyzer",
          "fields": {
            "pinyin": {
              "type": "text",
              "analyzer": "pinyinComplexAnalyzer",
              "search_analyzer": "pinyinComplexAnalyzer",
              "store": false,
              "term_vector": "with_offsets"
            }
          }
        },
        "ownerNature": {
          "type": "keyword"
        },
        "model": {
          "type": "keyword",
          //不能通过这个字段搜索
          "index": false
        },
        "weight": {
          "type": "integer"
        },
        "createTime": {
          "type": "date",
          "format": "yyyy-MM-dd HH:mm:ss"
        }
      }
    }

    采用json的方式,我觉得直观一点

    创建索引(需要注意的是,7.x后,es删除了type,只允许存在一种type,不需要指定type的值,默认是_doc)

    public void init() throws Exception {
            this.createIndex("commodity");
        }
    
    /**
         * 创建索引
         * @param index
         * @throws IOException
         */
        public void createIndex(String index) throws IOException {
            //如果存在就不创建了
            if(this.existsIndex(index)) {
                System.out.println(index+"索引库已经存在!");
                return;
            }
            // 开始创建库
            CreateIndexRequest request = new CreateIndexRequest(index);
            //配置文件
            ClassPathResource seResource = new ClassPathResource("mapper/setting.json");
            InputStream seInputStream = seResource.getInputStream();
            String seJson = String.join("
    ",IOUtils.readLines(seInputStream,"UTF-8"));
            seInputStream.close();
            //映射文件
            ClassPathResource mpResource = new ClassPathResource("mapper/"+index+"-mapping.json");
            InputStream mpInputStream = mpResource.getInputStream();
            String mpJson = String.join("
    ",IOUtils.readLines(mpInputStream,"UTF-8"));
            mpInputStream.close();
    
            request.settings(seJson, XContentType.JSON);
            request.mapping(mpJson, XContentType.JSON);
    
            //设置别名
            request.alias(new Alias(index+"_alias"));
            CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(request, RequestOptions.DEFAULT);
            boolean falg = createIndexResponse.isAcknowledged();
            if(falg){
                System.out.println("创建索引库:"+index+"成功!" );
            }
        }

    判断索引是否存在

        /**
         * 判断索引是否存在
         * @param index
         * @return
         * @throws IOException
         */
        public boolean existsIndex(String index) throws IOException {
            GetIndexRequest getRequest = new GetIndexRequest(index);
            getRequest.local(false);
            getRequest.humanReadable(true);
            return restHighLevelClient.indices().exists(getRequest, RequestOptions.DEFAULT);
        }

    删除索引

        /**
         * 删除索引
         * @param index
         * @return
         * @throws IOException
         */
        public boolean delIndex(String index) throws IOException {
            DeleteIndexRequest request = new DeleteIndexRequest(index);
            AcknowledgedResponse deleteIndexResponse = restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT);
            return deleteIndexResponse.isAcknowledged();
        }

     添加索引数据

        /**
         * 保存文档
         * @param kv 对应json映射里面键值对,index是索引名称
         * @return
         * @throws IOException
         */
        public boolean save(Kv kv) throws IOException {
            IndexRequest request = new IndexRequest(kv.getStr("index"))
                    .id(kv.getStr("id")).source(kv);
            IndexResponse response = restHighLevelClient.index(request,RequestOptions.DEFAULT);
            return response.isFragment();
        }

    删除索引数据

        /**
         * 根据id删除文档
         * @param id
         * @return
         * @throws IOException
         */
        public boolean delById(String id) throws IOException {
            DeleteRequest request = new DeleteRequest(ModuleConstants.COMMODITY.toLowerCase(),id);
            DeleteResponse response = restHighLevelClient.delete(request,RequestOptions.DEFAULT);
            return response.isFragment();
        }

    IK,拼音,短语分词分页搜索

     @Resource
        private RestHighLevelClient restHighLevelClient;
        @Resource
        private OutputChannel outputChannel;
    
        /**
         分页分词关键词查询
         * 使用QueryBuilder
         termQuery("key", obj) 完全匹配
         termsQuery("key", obj1, obj2..)   一次匹配多个值
         matchQuery("key", Obj) 单个匹配, field不支持通配符, 前缀具高级特性
         multiMatchQuery("text", "field1", "field2"..);  匹配多个字段, field有通配符忒行
         matchAllQuery();         匹配所有文件
         * 组合查询
         must(QueryBuilders) :   AND
         mustNot(QueryBuilders): NOT
         should:                  : OR
         percent_terms_to_match:匹配项(term)的百分比,默认是0.3
         min_term_freq:一篇文档中一个词语至少出现次数,小于这个值的词将被忽略,默认是2
         max_query_terms:一条查询语句中允许最多查询词语的个数,默认是25
         stop_words:设置停止词,匹配时会忽略停止词
         min_doc_freq:一个词语最少在多少篇文档中出现,小于这个值的词会将被忽略,默认是无限制
         max_doc_freq:一个词语最多在多少篇文档中出现,大于这个值的词会将被忽略,默认是无限制
         min_word_len:最小的词语长度,默认是0
         max_word_len:最多的词语长度,默认无限制
         boost_terms:设置词语权重,默认是1
         boost:设置查询权重,默认是1
         analyzer:设置使用的分词器,默认是使用该字段指定的分词器
         */
        @Override
        public Page<SearchVo> page(SearchVo searchVo){
            Page<SearchVo> page = new Page(searchVo.getCurrent(),searchVo.getSize(),0);
            // 页码
            try {
                // 构建查询
                SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
                // 索引查询
                BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
                //boost 设置权重
                //分词查询
                boolQueryBuilder.should(QueryBuilders.matchQuery("keyword", searchVo.getKeyword()).boost(2f));
                //拼音查询
                boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("keyword.pinyin", searchVo.getKeyword()).boost(2f));
                //模糊查询,不区分大小写
    //            boolQueryBuilder.should(QueryBuilders.wildcardQuery("keyword", "*"+searchVo.getKeyword().toLowerCase()+"*").boost(2f));
                //指定商家的性质
                if(StrKit.notBlank(searchVo.getKeyword1())){
                    boolQueryBuilder.must(QueryBuilders.termQuery("ownerNature",searchVo.getKeyword1()));
                }
                //必须满足should其中一个条件
                boolQueryBuilder.minimumShouldMatch(1);
                //时间范围查询
    //            boolQueryBuilder.must(QueryBuilders.rangeQuery("createTime")
    //                    .from(DateKit.format(DateKit.getDayBegin(),"yyyy-MM-dd HH:mm:ss"))
    //                    .to(DateKit.format(DateKit.getDayBegin(),"yyyy-MM-dd HH:mm:ss")));
                sourceBuilder.query(boolQueryBuilder);
                //设置返回的字段
    //            String[] includeFields = new String[] {"keyword"};
    //            sourceBuilder.fetchSource(includeFields,null);
                // 分页设置
                sourceBuilder.from(searchVo.getFrom());
                sourceBuilder.size(searchVo.getSize());
                //        sourceBuilder.sort("id", SortOrder.ASC); // 设置排序规则
                sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
    
                SearchRequest searchRequest = new SearchRequest(searchVo.getIndex());
                searchRequest.source(sourceBuilder);
                SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
                SearchHits searchHits = response.getHits();
                page.setTotal(searchHits.getTotalHits().value);
                List<SearchVo> list = new ArrayList<>();
                for (SearchHit hit : searchHits.getHits()) {
                    SearchVo vo = new SearchVo();
                    Kv kv = Kv.create().set(hit.getSourceAsMap());
                    vo.setId(kv.getStr("id"));
                    vo.setKeyword(kv.getStr("keyword"));
                    vo.setKeyword1(kv.getStr("ownerNature"));
                    vo.setModel(kv.getStr("model"));
                    list.add(vo);
                }
                page.setRecords(list);
            } catch (Exception e) {
                e.printStackTrace();
            }
            //收集关键词搜索记录
            searchVo.setIndex(ModuleConstants.KEYWORD.toLowerCase());
            outputChannel.searchSaveOutput().send(MessageBuilder.withPayload(searchVo).build());
    
            return page;
        }

     

    IK,拼音,短语分词分页并高亮关键词搜索

        @Resource
        private RestHighLevelClient restHighLevelClient;
    
        @Override
        public Page<SearchVo> pageHigh(SearchVo searchVo){
            Page<SearchVo> page = new Page(searchVo.getCurrent(),searchVo.getSize(),0);
            // 页码
            try {
                // 构建查询
                SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
                // 索引查询
                BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
                //boost 设置权重
                //分词查询
                boolQueryBuilder.should(QueryBuilders.matchQuery("keyword", searchVo.getKeyword()).boost(2f));
                //拼音查询
                boolQueryBuilder.should(QueryBuilders.matchPhraseQuery("keyword.pinyin", searchVo.getKeyword()).boost(2f));
                //模糊查询,不区分大小写
    //            boolQueryBuilder.should(QueryBuilders.wildcardQuery("keyword", "*"+searchVo.getKeyword().toLowerCase()+"*").boost(2f));
                //必须满足should其中一个条件
                boolQueryBuilder.minimumShouldMatch(1);
                //时间范围查询
    //            boolQueryBuilder.must(QueryBuilders.rangeQuery("createTime")
    //                    .from(DateKit.format(DateKit.getDayBegin(),"yyyy-MM-dd HH:mm:ss"))
    //                    .to(DateKit.format(DateKit.getDayBegin(),"yyyy-MM-dd HH:mm:ss")));
                sourceBuilder.query(boolQueryBuilder);
                //设置返回的字段
                String[] includeFields = new String[] {"keyword"};
                sourceBuilder.fetchSource(includeFields,null);
                // 高亮设置
                List<String> highlightFieldList = new ArrayList<>();
                highlightFieldList.add("keyword");
                HighlightBuilder highlightBuilder = new HighlightBuilder();
                for (int x = 0; x < highlightFieldList.size(); x++) {
                    HighlightBuilder.Field field = new HighlightBuilder.Field(highlightFieldList.get(x)).preTags("<high>").postTags("</high>");
                    highlightBuilder.field(field);
                }
                sourceBuilder.highlighter(highlightBuilder);
                // 分页设置
                sourceBuilder.from(searchVo.getFrom());
                sourceBuilder.size(searchVo.getSize());
                //        sourceBuilder.sort("id", SortOrder.ASC); // 设置排序规则
                sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS));
                //不指定索引,则搜索所有的索引
                SearchRequest searchRequest = new SearchRequest(searchVo.getIndex());
                searchRequest.source(sourceBuilder);
                SearchResponse response = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
                SearchHits searchHits = response.getHits();
                page.setTotal(searchHits.getTotalHits().value);
                List<SearchVo> list = new ArrayList<>();
                Pattern pattern = Pattern.compile("(?i)"+searchVo.getKeyword());
                for (SearchHit hit : searchHits.getHits()) {
                    SearchVo vo = new SearchVo();
                    Kv kv = Kv.create().set(hit.getSourceAsMap());
                    vo.setKeyword(kv.getStr("keyword"));
                    //高亮字段(拼音不做高亮,拼音的高亮有问题,会将整个字符串高亮)
                    if (!StringUtils.isEmpty(hit.getHighlightFields().get("keyword"))) {
                        Text[] text = hit.getHighlightFields().get("keyword").getFragments();
                        vo.setKeyword(text[0].toString());
                    }
                    //ngram短语,模糊搜索高亮,不区分大小写直接字符串替换
                    String keyword = vo.getKeyword();
                    if(!keyword.contains("<high>")){
                        Matcher matcher = pattern.matcher(keyword);
                        if(matcher.find()){
                            String s = matcher.group();
                            vo.setKeyword(keyword.replace(s,"<high>"+s+"</high>"));
                        }
                    }
                    list.add(vo);
                }
                page.setRecords(list);
            } catch (Exception e) {
                e.printStackTrace();
            }
            return page;
        }
  • 相关阅读:
    简单两行,实现无线WiFi共享上网,手机抓包再也不用愁了
    Windows下Python 3.6 安装BeautifulSoup库
    RSA加密算法破解及原理
    干货,Wireshark使用技巧-过滤规则
    干货:Wireshark使用技巧-显示规则
    干货!链家二手房数据抓取及内容解析要点
    Wireshark分析实战:某达速递登录帐号密码提取
    协议分析中的TCP/IP网络协议
    Wireshark使用教程:不同报文颜色的含义
    VMware kali虚拟机环境配置
  • 原文地址:https://www.cnblogs.com/suruozhong/p/12190898.html
Copyright © 2020-2023  润新知