一、安装
1、安装ElasticSearch
1.1window下安装
1.1.1解压缩即可使用
1.1.2目录结构:
1.2安装ElasticSearch-head界面显示
1.2.1下载地址:
https://github.com/mobz/elasticsearch-head
1.2.2安装步骤(需要node.js环境):
cd elasticsearch-head
npm install
npm run start
1.2.3解决跨域问题
修改elasticsearch.yml,加上
http.cors.enabled: true http.cors.allow-origin: "*"
1.3安装kibana
1.3.1什么是kibana?
而且还提供了操作Elasticsearch索引数据的控制台,并且提供了一定的API提示,非常有利于我们学习Elasticsearch的语法。
1.3.2安装步骤
kibana版本需与elasticsearch保持一致
1)解压即可使用
2)汉化界面:修改kibana.yml
加上
i18n.locale: "zh-CN"
1.4安装ik分词器
1.4.1解压到/path/to/elasticsearch-7.6.1/plugins
1.4.2使用
GET /_analyze { "analyzer": "ik_smart", "text": "白云城主叶孤城" } GET /_analyze { "analyzer": "ik_max_word", "text": "白云城主叶孤城" }
效果:
不足:叶孤城没有成为一个词
1.4.3添加自定义分词字典
1)新建my.dic
2)配置IKAnalyzer.cfg.xml:
3)效果:
二、操作Elasticsearch
2.1基本概念
Elasticsearch也是基于Lucene的全文检索库,本质也是存储数据,很多概念与MySQL类似的。
对比关系:
索引(indices)--------------------------------Databases 数据库 类型(type)-----------------------------------Table 数据表 文档(Document)-------------------------------Row 行 字段(Field)----------------------------------Columns 列
详细说明:
另外,在SolrCloud中,有一些集群相关的概念,在Elasticsearch也有类似的:
-
索引集(Indices,index的复数):逻辑上的完整索引 collection1
-
分片(shard):数据拆分后的各个部分
-
副本(replica):每个分片的复制
要注意的是:Elasticsearch本身就是分布式的,因此即便你只有一个节点,Elasticsearch默认也会对你的数据进行分片和副本操作,当你向集群添加新数据时,数据也会在新加入的节点中进行平衡。
2.2springboot集成elasticsearch
官方文档地址:https://www.elastic.co/guide/en/elasticsearch/client/java-rest/7.6/java-rest-high.html
2.2.1创建springboot工程
添加配置类:
package com.example.esapi.config; import org.apache.http.HttpHost; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestHighLevelClient; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @Configuration public class ElasticSearchConfig { @Bean public RestHighLevelClient restHighLevelClient(){ RestHighLevelClient restHighLevelClient = new RestHighLevelClient( RestClient.builder( new HttpHost("127.0.0.1", 9200, "http") ) ); return restHighLevelClient; } }
注意:解决版本不对称问题
2.2.2索引操作
package com.example.esapi; import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.indices.CreateIndexResponse; import org.elasticsearch.client.indices.GetIndexRequest; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import java.io.IOException; @SpringBootTest class EsApiApplicationTests { @Autowired private RestHighLevelClient restHighLevelClient; //测试索引的创建 @Test void testCreateIndex() throws IOException { //1.创建索引请求 CreateIndexRequest request = new CreateIndexRequest("my_index"); //2.客户端执行请求IndiceClient,请求后获得响应 CreateIndexResponse createIndexResponse = restHighLevelClient.indices().create(request, RequestOptions.DEFAULT); System.out.println(createIndexResponse); } //测试获取索引 @Test void testExistIndex() throws IOException { GetIndexRequest request = new GetIndexRequest("my_index"); boolean exists = restHighLevelClient.indices().exists(request, RequestOptions.DEFAULT); System.out.println(exists); } //测试删除索引 @Test void testDeleteIndex() throws IOException { DeleteIndexRequest request = new DeleteIndexRequest("my_index"); //删除 AcknowledgedResponse delete = restHighLevelClient.indices().delete(request, RequestOptions.DEFAULT); System.out.println(delete.isAcknowledged()); } }
2.2.3文档操作
//测试添加文档 @Test void testAddDocument() throws IOException { //创建对象 User user = new User("jack", 22); //创建请求 IndexRequest request = new IndexRequest("my_index"); //规则 put/my_index/_doc/1 request.id("1"); request.timeout("1s"); //将我们的数据放入请求 json request.source(JSON.toJSONString(user), XContentType.JSON); //客户端发送请求,获取响应结果 IndexResponse indexResponse = restHighLevelClient.index(request, RequestOptions.DEFAULT); System.out.println(indexResponse.toString()); System.out.println(indexResponse.status()); } //获取文档,判断是否存在 get/index/doc/1 @Test void testIsExists() throws IOException { GetRequest request = new GetRequest("my_index", "1"); //不获取返回的_source的上下文 request.fetchSourceContext(new FetchSourceContext(false)); request.storedFields("_none_"); boolean exists = restHighLevelClient.exists(request, RequestOptions.DEFAULT); System.out.println(exists); } //获得文档的信息 @Test void testGetDocument() throws IOException { GetRequest request = new GetRequest("my_index", "1"); GetResponse getResponse = restHighLevelClient.get(request, RequestOptions.DEFAULT); System.out.println(getResponse.getSourceAsString()); //打印文档的内容 System.out.println(getResponse); } //更新文档的信息 @Test void testUpdateDocument() throws IOException { //创建对象 User user = new User("mike", 12); //创建请求 UpdateRequest request = new UpdateRequest("my_index", "1"); request.timeout("1s"); //将我们的数据放入请求 json request.doc(JSON.toJSONString(user), XContentType.JSON); //客户端发送请求,获取响应结果 UpdateResponse updateResponse = restHighLevelClient.update(request, RequestOptions.DEFAULT); System.out.println(updateResponse.status()); } //删除文档记录 @Test void testDeleteDocument() throws IOException { DeleteRequest deleteRequest = new DeleteRequest("my_index", "1"); deleteRequest.timeout("1s"); DeleteResponse deleteResponse = restHighLevelClient.delete(deleteRequest, RequestOptions.DEFAULT); System.out.println(deleteResponse.status()); } //批量操作 插入数据 @Test void testBulkDocument() throws IOException { BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("10s"); ArrayList<User> users = new ArrayList<>(); users.add(new User("jack", 11)); users.add(new User("july", 22)); users.add(new User("john", 15)); users.add(new User("dick", 32)); users.add(new User("lily", 19)); users.add(new User("lucy", 20)); //批量处理请求 for (int i = 0; i < users.size(); i++) { bulkRequest.add( new IndexRequest("my_index") .id(""+(i+1)) .source(JSON.toJSONString(users.get(i)), XContentType.JSON) ); } BulkResponse bulkItemResponses = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); System.out.println(bulkItemResponses.hasFailures()); //返回false即为成功 } //查询 @Test void testSearch() throws IOException { SearchRequest searchRequest = new SearchRequest("my_index"); //构建搜索条件 SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); //查询条件,我们可以使用QueryBuilders工具来实现 TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("name", "lucy"); sourceBuilder.query(termQueryBuilder); sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS)); searchRequest.source(sourceBuilder); SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); System.out.println(JSON.toJSONString(searchResponse.getHits())); System.out.println("----------------------------"); for (SearchHit documentFields : searchResponse.getHits().getHits()) { System.out.println(documentFields.getSourceAsMap()); } }
三、实战
3.1创建项目
1)添加依赖
<!-- 解析网页--> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.10.2</version> </dependency> <!-- 阿里fastjson包JSON转换--> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.47</version> </dependency>
2)项目结构
3.2源码
1)Content.java
package com.example.jdsearch.pojo; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @Data @NoArgsConstructor @AllArgsConstructor public class Content { private String img; private String price; private String title; }
2)ContentService
package com.example.jdsearch.service; import com.alibaba.fastjson.JSON; import com.example.jdsearch.pojo.Content; import com.example.jdsearch.utils.HtmlParseUtil; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @Service public class ContentService { @Autowired private RestHighLevelClient restHighLevelClient; //解析数据并将数据放入es public Boolean parse(String keywords) throws IOException { List<Content> contents = new HtmlParseUtil().parse(keywords); //把查询到的数据放入es BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("3m"); //批量处理请求 for (int i = 0; i < contents.size(); i++) { bulkRequest.add( new IndexRequest("jd_index") .source(JSON.toJSONString(contents.get(i)), XContentType.JSON) ); } BulkResponse bulkItemResponses = restHighLevelClient.bulk(bulkRequest, RequestOptions.DEFAULT); return !bulkItemResponses.hasFailures(); //返回false即为成功 } //搜索 public List<Map<String, Object>> search(String keywords, Integer pageNo, Integer pageSize) throws IOException { if(pageNo <= 1) pageNo = 1; //条件搜索 SearchRequest searchRequest = new SearchRequest("jd_index"); //构建搜索条件 SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); //分页 sourceBuilder.from(pageNo); sourceBuilder.size(pageSize); //查询条件,我们可以使用QueryBuilders工具来实现 TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("title", keywords); sourceBuilder.query(termQueryBuilder); sourceBuilder.timeout(new TimeValue(60, TimeUnit.SECONDS)); //高亮 HighlightBuilder highlightBuilder = new HighlightBuilder(); highlightBuilder.field("title"); highlightBuilder.requireFieldMatch(false);//是否需要多个高亮显示 highlightBuilder.preTags("<span style='color:red'>"); highlightBuilder.postTags("</span>"); sourceBuilder.highlighter(highlightBuilder); //执行结果 searchRequest.source(sourceBuilder); SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT); //解析结果 ArrayList<Map<String, Object>> list = new ArrayList<>(); for (SearchHit documentFields : searchResponse.getHits().getHits()) { Map<String, HighlightField> highlightFields = documentFields.getHighlightFields(); HighlightField title = highlightFields.get("title"); Map<String, Object> sourceAsMap = documentFields.getSourceAsMap(); //解析高亮的字段,将原来的字段换成我们高亮的字段即可 if(title != null){ Text[] fragments = title.fragments(); String n_title = ""; for (Text text : fragments) { n_title += text; } sourceAsMap.put("title", n_title); } list.add(sourceAsMap); } return list; } }
3)HtmlParseUtil
package com.example.jdsearch.utils; import com.example.jdsearch.pojo.Content; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; public class HtmlParseUtil { public static void main(String[] args) throws IOException { HtmlParseUtil htmlParseUtil = new HtmlParseUtil(); List<Content> contents = htmlParseUtil.parse("java"); contents.forEach(System.out::println); } public List<Content> parse(String keywords) throws IOException { //准备url String url = "https://search.jd.com/Search?keyword="+keywords; //解析网页,jsoup返回的document对象就是js中的document对象 Document document = Jsoup.parse(new URL(url), 30000); Element j_goodsList = document.getElementById("J_goodsList"); List<Content> contents = new ArrayList<>(); //获取所有的li元素 Elements elements = j_goodsList.getElementsByTag("li"); for (Element element : elements) { String img = element.getElementsByTag("img").eq(0).attr("data-lazy-img"); //图片延迟加载,不在src String price = element.getElementsByClass("p-price").eq(0).text(); String title = element.getElementsByClass("p-name").eq(0).text(); contents.add(new Content(img, price, title)); } return contents; } }
4)ContentController
package com.example.jdsearch.web; import com.example.jdsearch.service.ContentService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import java.io.IOException; import java.util.List; import java.util.Map; @RestController public class ContentController { @Autowired private ContentService contentService; //爬取数据 @RequestMapping("/parse/{keywords}") public Boolean parse(@PathVariable("keywords") String keywords) throws IOException { return contentService.parse(keywords); } //查询数据 @RequestMapping("/search/{keywords}/{pageNo}/{pageSize}") public List<Map<String, Object>> search( @PathVariable("keywords") String keywords, @PathVariable("pageNo") Integer pageNo, @PathVariable("pageSize") Integer pageSize) throws IOException { return contentService.search(keywords, pageNo, pageSize); } }
5)IndexController
package com.example.jdsearch.web; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.GetMapping; @Controller public class IndexController { @GetMapping({"/", "/index"}) public String index(){ return "index"; } }
6)index.html
<!DOCTYPE html> <html xmlns:th="http://www.thymeleaf.org"> <head> <meta charset="utf-8"/> <title>狂神说Java-ES仿京东实战</title> <link rel="stylesheet" th:href="@{/css/style.css}"/> </head> <body class="pg"> <div class="page" id="app"> <div id="mallPage" class=" mallist tmall- page-not-market "> <!-- 头部搜索 --> <div id="header" class=" header-list-app"> <div class="headerLayout"> <div class="headerCon "> <!-- Logo--> <h1 id="mallLogo"> <img th:src="@{/images/jdlogo.png}" alt=""> </h1> <div class="header-extra"> <!--搜索--> <div id="mallSearch" class="mall-search"> <form name="searchTop" class="mallSearch-form clearfix"> <fieldset> <legend>天猫搜索</legend> <div class="mallSearch-input clearfix"> <div class="s-combobox" id="s-combobox-685"> <div class="s-combobox-input-wrap"> <input type="text" autocomplete="off" value="dd" id="mq" class="s-combobox-input" aria-haspopup="true" v-model="keywords"> </div> </div> <button type="submit" id="searchbtn" @click.prevent="search">搜索</button> </div> </fieldset> </form> <ul class="relKeyTop"> <li><a>狂神说Java</a></li> <li><a>狂神说前端</a></li> <li><a>狂神说Linux</a></li> <li><a>狂神说大数据</a></li> <li><a>狂神聊理财</a></li> </ul> </div> </div> </div> </div> </div> <!-- 商品详情页面 --> <div id="content"> <div class="main"> <!-- 品牌分类 --> <form class="navAttrsForm"> <div class="attrs j_NavAttrs" style="display:block"> <div class="brandAttr j_nav_brand"> <div class="j_Brand attr"> <div class="attrKey"> 品牌 </div> <div class="attrValues"> <ul class="av-collapse row-2"> <li><a href="#"> 狂神说 </a></li> <li><a href="#"> Java </a></li> </ul> </div> </div> </div> </div> </form> <!-- 排序规则 --> <div class="filter clearfix"> <a class="fSort fSort-cur">综合<i class="f-ico-arrow-d"></i></a> <a class="fSort">人气<i class="f-ico-arrow-d"></i></a> <a class="fSort">新品<i class="f-ico-arrow-d"></i></a> <a class="fSort">销量<i class="f-ico-arrow-d"></i></a> <a class="fSort">价格<i class="f-ico-triangle-mt"></i><i class="f-ico-triangle-mb"></i></a> </div> <!-- 商品详情 --> <div class="view grid-nosku"> <div class="product" v-for="item in items"> <div class="product-iWrap"> <!--商品封面--> <div class="productImg-wrap"> <a class="productImg"> <img :src="item.img"> </a> </div> <!--价格--> <p class="productPrice"> <em>{{item.price}}</em> </p> <!--标题--> <p class="productTitle"> <a v-html="item.title"> </a> </p> <!-- 店铺名 --> <div class="productShop"> <span>店铺: 狂神说Java </span> </div> <!-- 成交信息 --> <p class="productStatus"> <span>月成交<em>999笔</em></span> <span>评价 <a>3</a></span> </p> </div> </div> </div> </div> </div> </div> </div> <script th:src="@{/js/vue.min.js}"></script> <script th:src="@{/js/axios.min.js}"></script> <script> const app = new Vue({ el: "#app", data:{ keywords:"", items: [] }, methods:{ search: function () { axios.get("/search/"+this.keywords+"/1/10").then(result => { this.items = result.data; }) } } }) </script> </body> </html>
7)application.properties
spring.thymeleaf.cache=false server.port=9090