全文检索lucene6.1的检索方式

全文检索lucene6.1的检索方式
背景：

工作任务完成后，闲暇之计给自己充充电！

Lucene是一个纯java全文检索工具包，采用倒排索引原理。

全文检索：指的是计算机索引程序通过扫描文章的每一个词，对每一个词建立一个索引，并指明该词在文章中出现的次数和位置。

索引的类型分为：1：为一索引、2：主键索引、3：聚集索引。索引就是加快检索表中数据的方法。

搜索：
一：按被搜索的资源类型
1、可以转为文本的
2、多媒体类型的
二：按照搜索方式：
1、不处理语义，只是找出现了指定词语的所有文本。（指对词语进行匹配）
基本概念：
1、使用流程：先建立索引，（索引库）在进行搜索。
2、使用Lucene的数据结构，document、field。
建立索引的过程：
1、定义一个语法分词器
2、确定索引存储的位置
3、创建IndexWriter，进行索引的写入
4、内容提取，进行索引文件的写入
5、关闭indexWriter
从索引库中搜索的过程：
1、打开存储位置
2、创建搜索器
3、类似SQL进行查询
4、处理结果
5、关闭DirectoryReader

-----------------------------------------------------------------------------------------------------------------
1. /**
2. * @项目名称：lucene
3. * @类名称：Article
4. * @类描述：这是一个文章实体类
5. * @创建人：YangChao
6. * @创建时间：2016年8月30日下午3:11:38
7. * @version 1.0.0
8. */
9. public class Article {
10. private Integer id;
11. private String title;
12. private String content;
13. }
1. /**
2. * @项目名称：lucene
3. * @类名称：DocumentUtils
4. * @类描述：文章实体类和Document的转换工具
5. * @创建人：YangChao
6. * @创建时间：2016年8月31日上午10:15:22
7. * @version 1.0.0
8. */
9. public class DocumentUtils {
10. public static Document article2Document(Article article) {
11. Document doc = new Document();
12. doc.add(new Field("id", article.getId().toString(), TextField.TYPE_STORED));
13. doc.add(new Field("title", article.getTitle(), TextField.TYPE_STORED));
14. doc.add(new Field("content", article.getContent(), TextField.TYPE_STORED));
15. return doc;
16. }
18. public static Article document2Ariticle(Document doc) {
19. Article article = new Article();
20. article.setId(Integer.parseInt(doc.get("id")));
21. article.setTitle(doc.get("title"));
22. article.setContent(doc.get("content"));
23. return article;
24. }
25. }
1. /**
2. * @项目名称：lucene
3. * @类名称：LuceneUtils
4. * @类描述：获取分词器和索引位置
5. * @创建人：YangChao
6. * @创建时间：2016年8月31日上午9:48:06
7. * @version 1.0.0
8. */
9. public class LuceneUtils {
10. private static Logger logger = Logger.getLogger(LuceneUtils.class);
11. private static Directory directory;
12. private static Analyzer analyzer;
13. static {
14. try {
15. directory = FSDirectory.open(Paths.get("./tmp/testindex"));
16. // analyzer = new StandardAnalyzer();
17. analyzer = new SmartChineseAnalyzer();
18. } catch (Exception e) {
19. logger.error("LuceneUtils error!", e);
20. }
21. }
23. public static Directory getDirectory() {
24. return directory;
25. }
27. public static Analyzer getAnalyzer() {
28. return analyzer;
29. }
31. public static void closeIndexWriter(IndexWriter indexWriter) {
32. if (indexWriter != null) {
33. try {
34. indexWriter.close();
35. } catch (Exception e2) {
36. logger.error("indexWriter.close error", e2);
37. }
38. }
39. }
41. }
1. **
2. * @项目名称：lucene
3. * @类名称：QueryResult
4. * @类描述：结果集
5. * @创建人：YangChao
6. * @创建时间：2016年8月31日下午4:56:24
7. * @version 1.0.0
8. */
9. public class QueryResult {
10. private int count;
11. private List list;
13. public QueryResult() {
14. super();
15. }
17. public QueryResult(int count, List list) {
18. super();
19. this.count = count;
20. this.list = list;
21. }
22. }
1. /**
2. * @项目名称：lucene
3. * @类名称：IndexDao
4. * @类描述：
5. * @创建人：YangChao
6. * @创建时间：2016年8月31日上午10:12:05
7. * @version 1.0.0
8. */
9. public class IndexDao {
10. private static Logger logger = Logger.getLogger(IndexDao.class);
12. public void save(Article article) {
13. Document doc = DocumentUtils.article2Document(article);
14. IndexWriter indexWriter = null;
15. try {
16. IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.getAnalyzer());
17. indexWriter = new IndexWriter(LuceneUtils.getDirectory(), config);
18. indexWriter.addDocument(doc);
19. } catch (Exception e) {
20. logger.error("IndexDao.save error", e);
21. } finally {
22. LuceneUtils.closeIndexWriter(indexWriter);
23. }
24. }
26. public void delete(String id) {
27. IndexWriter indexWriter = null;
28. try {
29. Term term = new Term("id", id);
30. IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.getAnalyzer());
31. indexWriter = new IndexWriter(LuceneUtils.getDirectory(), config);
32. indexWriter.deleteDocuments(term);// 删除含有指定term的所有文档
33. } catch (Exception e) {
34. logger.error("IndexDao.save error", e);
35. } finally {
36. LuceneUtils.closeIndexWriter(indexWriter);
37. }
38. }
40. public void update(Article article) {
41. Document doc = DocumentUtils.article2Document(article);
42. IndexWriter indexWriter = null;
43. try {
44. Term term = new Term("id", article.getId().toString());
45. IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.getAnalyzer());
46. indexWriter = new IndexWriter(LuceneUtils.getDirectory(), config);
47. indexWriter.updateDocument(term, doc);// 先删除，后创建。
48. } catch (Exception e) {
49. logger.error("IndexDao.save error", e);
50. } finally {
51. LuceneUtils.closeIndexWriter(indexWriter);
52. }
53. }
55. public QueryResult search(String queryString, int firstResult, int maxResult) {
56. List<Article> list = new ArrayList<Article>();
57. try {
58. DirectoryReader ireader = DirectoryReader.open(LuceneUtils.getDirectory());
59. // 2、第二步，创建搜索器
60. IndexSearcher isearcher = new IndexSearcher(ireader);
62. // 3、第三步，类似SQL，进行关键字查询
63. String[] fields = { "title", "content" };
64. QueryParser parser = new MultiFieldQueryParser(fields, LuceneUtils.getAnalyzer());
65. Query query = parser.parse("检索");
67. TopDocs topDocs = isearcher.search(query, firstResult + maxResult);
68. int count = topDocs.totalHits;// 总记录数
69. System.out.println("总记录数为：" + topDocs.totalHits);// 总记录数
70. ScoreDoc[] hits = topDocs.scoreDocs;// 第二个参数，指定最多返回前n条结果
72. // 高亮
73. Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
74. Scorer source = new QueryScorer(query);
75. Highlighter highlighter = new Highlighter(formatter, source);
77. // 摘要
78. // Fragmenter fragmenter = new SimpleFragmenter(5);
79. // highlighter.setTextFragmenter(fragmenter);
81. // 处理结果
82. int endIndex = Math.min(firstResult + maxResult, hits.length);
83. for (int i = firstResult; i < endIndex; i++) {
84. Document hitDoc = isearcher.doc(hits[i].doc);
85. Article article = DocumentUtils.document2Ariticle(hitDoc);
86. //
87. String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "content", hitDoc.get("content"));
88. if (text != null) {
89. article.setContent(text);
90. }
91. list.add(article);
92. }
93. ireader.close();
94. return new QueryResult(count, list);
95. } catch (Exception e) {
96. logger.error("IndexDao.search error", e);
97. }
98. return null;
99. }
100. }
101. lucence详细学习地址:http://www.cnblogs.com/zhuxiaojie/p/5277219.html
相关阅读:
python wxpython
python tkinter的Label
python tkinter开始
 cmd退出python
数据库基本操作
 mysql中的key primary key 和unique key
python 关于每个项目的解释器
 python3修改文件指定行和4种文件打开方式
 python3 变量格式化转换成字符串
 安装Gitlab到Ubuntu（APT）
原文地址：https://www.cnblogs.com/poilk/p/6600186.html

全文检索lucene6.1的检索方式

背景：