• 使用Lucene全文检索并使用中文版和高亮显示


    使用Lucene全文检索并使用中文版和高亮显示

    中文分词需要引入 中文分词发的jar 包,咱们从maven中获取

    	<!-- lucene中文分词器 -->
    	<dependency>
    	    <groupId>org.apache.lucene</groupId>
    	    <artifactId>lucene-analyzers-smartcn</artifactId>
    	    <version>5.3.1</version>
    	</dependency>
    

    下面是分词和索引的事例

    	package LuceneTest.LuceneTest;
    
    	import java.nio.file.Paths;
    	
    	import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    	import org.apache.lucene.document.Document;
    	import org.apache.lucene.document.Field.Store;
    	import org.apache.lucene.document.IntField;
    	import org.apache.lucene.document.StringField;
    	import org.apache.lucene.document.TextField;
    	import org.apache.lucene.index.IndexWriter;
    	import org.apache.lucene.index.IndexWriterConfig;
    	import org.apache.lucene.store.Directory;
    	import org.apache.lucene.store.FSDirectory;
    	import org.junit.Test;
    	
    	public class IndexChina {
    
    private Directory dir; //存放索引的位置
    
    //准备一下用来测试的数据
    private Integer ids[] = {1, 2, 3}; //用来标识文档
    private String citys[] = {"上海", "南京", "青岛"};
    private String descs[] = {
        "上海是个繁华的城市。",
        "南京是一个有文化的城市。",
        "青岛是一个美丽的城市。"
    };
    
    //生成索引
    @Test
    public void index(String indexDir) throws Exception {   
        dir = FSDirectory.open(Paths.get(indexDir));
        IndexWriter writer = getWriter();
        for(int i = 0; i < ids.length; i++) {
            Document doc = new Document();
            doc.add(new IntField("id", ids[i], Store.YES));
            doc.add(new StringField("city", citys[i], Store.YES));
            doc.add(new TextField("desc", descs[i], Store.YES));
            writer.addDocument(doc); //添加文档
        }
        writer.close(); //close了才真正写到文档中
    }
    
    //获取IndexWriter实例
    private IndexWriter getWriter() throws Exception {
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();//使用中文分词器
        IndexWriterConfig config = new IndexWriterConfig(analyzer); //将标准分词器配到写索引的配置中
        IndexWriter writer = new IndexWriter(dir, config); //实例化写索引对象
        return writer;
    }
    
    public static void main(String[] args) throws Exception {
        new IndexChina().index("D:\lucene2");     
    }
    }
    

    新建的查询

    	package LuceneTest.LuceneTest;
    
    	import java.nio.file.Paths;
    	
    	import org.apache.lucene.analysis.Analyzer;
    	import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    	import org.apache.lucene.analysis.standard.StandardAnalyzer;
    	import org.apache.lucene.document.Document;
    	import org.apache.lucene.index.DirectoryReader;
    	import org.apache.lucene.index.IndexReader;
    	import org.apache.lucene.queryparser.classic.QueryParser;
    	import org.apache.lucene.search.IndexSearcher;
    	import org.apache.lucene.search.Query;
    	import org.apache.lucene.search.ScoreDoc;
    	import org.apache.lucene.search.TopDocs;
    	import org.apache.lucene.store.Directory;
    	import org.apache.lucene.store.FSDirectory;
    	
    	public class SearcherChina {
    
    public static void search(String indexDir, String q) throws Exception {
    
        Directory dir = FSDirectory.open(Paths.get(indexDir)); //获取要查询的路径,也就是索引所在的位置
        IndexReader reader = DirectoryReader.open(dir);
        IndexSearcher searcher = new IndexSearcher(reader);
        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); //使用中文分词器
        QueryParser parser = new QueryParser("desc", analyzer); //查询解析器
        Query query = parser.parse(q); //通过解析要查询的String,获取查询对象
    
        long startTime = System.currentTimeMillis(); //记录索引开始时间
        TopDocs docs = searcher.search(query, 10);//开始查询,查询前10条数据,将记录保存在docs中
        long endTime = System.currentTimeMillis(); //记录索引结束时间
        System.out.println("匹配" + q + "共耗时" + (endTime-startTime) + "毫秒");
        System.out.println("查询到" + docs.totalHits + "条记录");
    
        for(ScoreDoc scoreDoc : docs.scoreDocs) { //取出每条查询结果
            Document doc = searcher.doc(scoreDoc.doc); //scoreDoc.doc相当于docID,根据这个docID来获取文档
            System.out.println(doc.get("city")); 
            System.out.println(doc.get("desc")); 
            String desc = doc.get("desc");
        }
        reader.close();
    }
    
    public static void main(String[] args) {
        String indexDir = "D:\lucene2";
        String q = "上海繁华"; //查询这个字符
        try {
            search(indexDir, q);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    }
    

    搜索结果的高亮显示

    引入jar文件

    	 <!-- lucene高亮显示 -->
    		<dependency>
    		    <groupId>org.apache.lucene</groupId>
    		    <artifactId>lucene-highlighter</artifactId>
    		    <version>5.3.1</version>
    		</dependency>
    

    新建查询并将查询的结果高亮

    	package LuceneTest.LuceneTest;
    
    	import java.io.StringReader;
    	import java.nio.file.Paths;
    	
    	import org.apache.lucene.analysis.Analyzer;
    	import org.apache.lucene.analysis.TokenStream;
    	import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    	import org.apache.lucene.analysis.standard.StandardAnalyzer;
    	import org.apache.lucene.document.Document;
    	import org.apache.lucene.index.DirectoryReader;
    	import org.apache.lucene.index.IndexReader;
    	import org.apache.lucene.queryparser.classic.QueryParser;
    	import org.apache.lucene.search.IndexSearcher;
    	import org.apache.lucene.search.Query;
    	import org.apache.lucene.search.ScoreDoc;
    	import org.apache.lucene.search.TopDocs;
    	import org.apache.lucene.search.highlight.Fragmenter;
    	import org.apache.lucene.search.highlight.Highlighter;
    	import org.apache.lucene.search.highlight.QueryScorer;
    	import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
    	import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
    	import org.apache.lucene.store.Directory;
    	import org.apache.lucene.store.FSDirectory;
    	
    	public class SearcherChina {
    	
    	    public static void search(String indexDir, String q) throws Exception {
    	
    	        Directory dir = FSDirectory.open(Paths.get(indexDir)); //获取要查询的路径,也就是索引所在的位置
    	        IndexReader reader = DirectoryReader.open(dir);
    	        IndexSearcher searcher = new IndexSearcher(reader);
    	        SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer(); //使用中文分词器
    	        QueryParser parser = new QueryParser("desc", analyzer); //查询解析器
    	        Query query = parser.parse(q); //通过解析要查询的String,获取查询对象
    	
    	        long startTime = System.currentTimeMillis(); //记录索引开始时间
    	        TopDocs docs = searcher.search(query, 10);//开始查询,查询前10条数据,将记录保存在docs中
    	        long endTime = System.currentTimeMillis(); //记录索引结束时间
    	        System.out.println("匹配" + q + "共耗时" + (endTime-startTime) + "毫秒");
    	        System.out.println("查询到" + docs.totalHits + "条记录");
    	
    	        
    	        //此处加入的是搜索结果的高亮部分
    	        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>"); //如果不指定参数的话,默认是加粗,即<b><b/>
    	        QueryScorer scorer = new QueryScorer(query);//计算得分,会初始化一个查询结果最高的得分
    	        Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); //根据这个得分计算出一个片段
    	        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
    	        highlighter.setTextFragmenter(fragmenter); //设置一下要显示的片段
    	
    	        
    	        
    	        for(ScoreDoc scoreDoc : docs.scoreDocs) { //取出每条查询结果
    	            Document doc = searcher.doc(scoreDoc.doc); //scoreDoc.doc相当于docID,根据这个docID来获取文档
    	            System.out.println(doc.get("city")); 
    	            System.out.println(doc.get("desc")); 
    	            String desc = doc.get("desc");
    	            
    	            
    	          //显示高亮部分
    	            if(desc != null) {
    	                TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
    	                String summary = highlighter.getBestFragment(tokenStream, desc);
    	                System.out.println(summary);
    	            }
    	            
    	        }
    	        
    	        
    	        
    	        reader.close();
    	    }
    	
    	    public static void main(String[] args) {
    	        String indexDir = "D:\lucene2";
    	        String q = "南京文化"; //查询这个字符
    	        try {
    	            search(indexDir, q);
    	        } catch (Exception e) {
    	            e.printStackTrace();
    	        }
    	    }
    	}
  • 相关阅读:
    网络基础知识
    mysql安装
    docker打包镜像
    python的基础
    python静态属性的理解
    python中的静态方法和类方法
    python类的两种创建方式
    python的继承
    python中time和datetime模块
    python之模块
  • 原文地址:https://www.cnblogs.com/wangshouchang/p/6869630.html
Copyright © 2020-2023  润新知