• lucene 索引搜索 BlogIndex.java


    package com.blog.lucene;
    import com.blog.entity.Blog;
    import com.blog.utils.DateUtils;
    import com.blog.utils.StringUtils;
    import org.apache.commons.lang.StringEscapeUtils;
    import org.apache.lucene.analysis.TokenStream;
    import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.*;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.*;
    import org.apache.lucene.search.highlight.*;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    
    import java.io.IOException;
    import java.io.StringReader;
    import java.nio.file.Paths;
    import java.util.Date;
    import java.util.LinkedList;
    import java.util.List;
    
    
    /**
     * 使用lucene对博客实现增删改查 
     */
    @SuppressWarnings("all")
    public class BlogIndex {
        private Directory dir = null;
    
        private String lucenePath = "e://lucene";
    
        /**
         * 获取luene的写入方法
         * @return
         * @throws IOException
         */
        private IndexWriter getWriter() throws IOException {
            this.dir = FSDirectory.open(Paths.get(this.lucenePath, new String[0]));
            SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
            IndexWriter writer = new IndexWriter(dir,iwc);
            return writer;
        }
    
        /**
         * 增加索引
         */
        public void addIndex(Blog blog) throws IOException {
            IndexWriter writer = getWriter();
            Document document = new Document();
            document.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES));
            document.add(new TextField("title", blog.getTitle(), Field.Store.YES));
            document.add(new StringField("releaseDate",DateUtils.formatDate(new Date(),"yyyy-MM-dd"), Field.Store.YES));
            document.add(new TextField("content", blog.getContentNoTag(), Field.Store.YES));
            document.add(new StringField("keyWord", blog.getKeyWord(), Field.Store.YES));
            writer.addDocument(document);
            writer.close();
        }
    
        /**
         * 更新索引
         */
        public void updateIndex(Blog blog) throws IOException {
            IndexWriter writer = getWriter();
            Document document = new Document();
            document.add(new StringField("id", String.valueOf(blog.getId()), Field.Store.YES));
            document.add(new TextField("title", blog.getTitle(), Field.Store.YES));
            document.add(new StringField("releaseDate", DateUtils.formatDate(new Date(),"yyyy-MM-dd"), Field.Store.YES));
            document.add(new StringField("content", blog.getContentNoTag(), Field.Store.YES));
            document.add(new StringField("keyWord", blog.getKeyWord(), Field.Store.YES));
            writer.updateDocument(new Term("id", String.valueOf(blog.getId())),document);
            writer.close();
        }
    
        /**
         * 删除索引
         */
        public void deleteIndex(String blogId) throws IOException {
            IndexWriter writer = getWriter();
            writer.deleteDocuments(new Term[]{new Term("id", blogId)});
            writer.forceMergeDeletes();
            writer.commit();
            writer.close();
        }
    
        /**
         * 搜索索引
         */
        public List<Blog> searchBlog(String q) throws Exception {
            List<Blog> blogList = new LinkedList<Blog>();
            dir = FSDirectory.open(Paths.get(this.lucenePath, new String[0]));
            //获取reader
            IndexReader reader = DirectoryReader.open(this.dir);
            //获取流
            IndexSearcher is = new IndexSearcher(reader);
            //放入查询条件
            BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
            SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
            QueryParser parser = new QueryParser("title", analyzer);
            Query query = parser.parse(q);
            QueryParser parser2 = new QueryParser("content", analyzer);
            Query query2 = parser2.parse(q);
            QueryParser parser3 = new QueryParser("keyWord", analyzer);
            Query query3 = parser3.parse(q);
    
            booleanQuery.add(query, BooleanClause.Occur.SHOULD);
            booleanQuery.add(query2, BooleanClause.Occur.SHOULD);
            booleanQuery.add(query3, BooleanClause.Occur.SHOULD);
            //最多返回100条数据
            TopDocs hits = is.search(booleanQuery.build(), 100);
    
            //高亮搜索字
            QueryScorer scorer = new QueryScorer(query);
            Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
            SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
            Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer);
            highlighter.setTextFragmenter(fragmenter);
    
            //遍历查询结果,放入blogList
            for (ScoreDoc scoreDoc : hits.scoreDocs) {
                Document doc = is.doc(scoreDoc.doc);
                Blog blog = new Blog();
                blog.setId(Integer.valueOf(Integer.parseInt(doc.get("id"))));
                blog.setReleaseDateStr(doc.get("releaseDate"));
                String title = doc.get("title");
                String content = StringEscapeUtils.escapeHtml(doc.get("content"));
                String keyWord = doc.get("keyWord");
    
    
                if (title != null){
                    TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(title));
                    String hTitle = highlighter.getBestFragment(tokenStream, title);
                    if (StringUtils.isEmpty(hTitle)){
                        blog.setTitle(title);
                    }else {
                        blog.setTitle(hTitle);
                    }
                }
                if (content != null){
                    TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(content));
                    String hContent = highlighter.getBestFragment(tokenStream, content);
                    if (StringUtils.isEmpty(hContent)){
                        if (content.length()<=200){
                            blog.setContent(content);
                        }else {
                            blog.setContent(content.substring(0, 200));
                        }
    
                    }else {
                        blog.setTitle(hContent);
                    }
                }
                if (keyWord != null){
                    TokenStream tokenStream = analyzer.tokenStream("keyWord", new StringReader(keyWord));
                    String hKeyWord = highlighter.getBestFragment(tokenStream, keyWord);
                    if (StringUtils.isEmpty(hKeyWord)){
                        blog.setTitle(keyWord);
                    }else {
                        blog.setTitle(hKeyWord);
                    }
                }
    
                blogList.add(blog);
            }
    
            return blogList;
        }
    
    
    
    
    }
  • 相关阅读:
    IBM MQ 学习
    spring中配置监听队列的MQ
    数据库优化(二)
    设计模式
    VBA学习笔记(2)--新建word文档并插入文字
    VBA代码分行
    excel保存时出现“请注意,您的文档的部分内容可能包含了文档检查器无法删除的个人信息”
    Excel VBA 操作 Word(入门篇)
    win10无法使用内置管理员账户打开应用
    五笔字根拆分规则_字根拆分方法
  • 原文地址:https://www.cnblogs.com/gaoyangliu/p/12630083.html
Copyright © 2020-2023  润新知