1 import java.io.File; 2 import java.io.IOException; 3 import java.io.StringReader; 4 5 import org.apache.lucene.analysis.Analyzer; 6 import org.apache.lucene.analysis.TokenStream; 7 import org.apache.lucene.document.Document; 8 import org.apache.lucene.document.TextField; 9 import org.apache.lucene.document.Field.Store; 10 import org.apache.lucene.index.IndexReader; 11 import org.apache.lucene.index.IndexWriter; 12 import org.apache.lucene.index.IndexWriterConfig; 13 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 14 import org.apache.lucene.queryparser.classic.ParseException; 15 import org.apache.lucene.queryparser.classic.QueryParser; 16 import org.apache.lucene.search.IndexSearcher; 17 import org.apache.lucene.search.Query; 18 import org.apache.lucene.search.ScoreDoc; 19 import org.apache.lucene.search.TopDocs; 20 import org.apache.lucene.search.TopScoreDocCollector; 21 import org.apache.lucene.search.highlight.Highlighter; 22 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 23 import org.apache.lucene.search.highlight.QueryScorer; 24 import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 25 import org.apache.lucene.store.Directory; 26 import org.apache.lucene.store.FSDirectory; 27 import org.apache.lucene.util.Version; 28 import org.wltea.analyzer.lucene.IKAnalyzer; 29 30 public class IndexTools { 31 /** 32 * 获得indexwriter对象 33 * 34 * @param dir 35 * @return 36 * @throws IOException 37 * @throws Exception 38 */ 39 private IndexWriter getIndexWriter(Directory dir, Analyzer analyzer) throws IOException { 40 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); 41 return new IndexWriter(dir, iwc); 42 } 43 44 /** 45 * 关闭indexwriter对象 46 * 47 * @throws IOException 48 * 49 * @throws Exception 50 */ 51 private void closeWriter(IndexWriter indexWriter) throws IOException { 52 if (indexWriter != null) { 53 indexWriter.close(); 54 } 55 } 56 57 /** 58 * 创建索引 59 * 60 * @throws InvalidTokenOffsetsException 61 */ 62 public void createIndex() throws InvalidTokenOffsetsException { 63 String indexPath = "D://luceneindex"; // 建立索引文件的目录 64 // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分 65 Analyzer analyzer = new IKAnalyzer(true); 66 IndexWriter indexWriter = null; 67 Directory directory = null; 68 try { 69 directory = FSDirectory.open(new File(indexPath)); 70 indexWriter = getIndexWriter(directory, analyzer); 71 } catch (Exception e) { 72 System.out.println("索引打开异常!"); 73 } 74 // 添加索引 75 try { 76 Document document = new Document(); 77 document.add(new TextField("filename", "标题:起点", Store.YES)); 78 document.add(new TextField("content", "内容:我是一名程序员", Store.YES)); 79 indexWriter.addDocument(document); 80 Document document1 = new Document(); 81 document1.add(new TextField("filename", "标题:终点", Store.YES)); 82 document1.add(new TextField("content", "内容:我不再只是程序员", Store.YES)); 83 indexWriter.addDocument(document1); 84 indexWriter.commit(); 85 } catch (IOException e1) { 86 System.out.println("索引创建异常!"); 87 } 88 try { 89 closeWriter(indexWriter); 90 } catch (Exception e) { 91 System.out.println("索引关闭异常!"); 92 } 93 } 94 95 /** 96 * 搜索 97 * 98 * @throws ParseException 99 * @throws IOException 100 * @throws InvalidTokenOffsetsException 101 */ 102 @SuppressWarnings("deprecation") 103 public void searchIndex() throws ParseException, IOException, InvalidTokenOffsetsException { 104 String indexPath = "D://luceneindex"; // 建立索引文件的目录 105 // 默认IKAnalyzer()-false:实现最细粒度切分算法,true:分词器采用智能切分 106 Analyzer analyzer = new IKAnalyzer(true); 107 Directory directory = null; 108 try { 109 directory = FSDirectory.open(new File(indexPath)); 110 } catch (Exception e) { 111 System.out.println("索引打开异常!"); 112 } 113 IndexReader ireader = null; 114 IndexSearcher isearcher = null; 115 try { 116 ireader = IndexReader.open(directory); 117 } catch (IOException e) { 118 System.out.println("打开索引文件!"); 119 } 120 isearcher = new IndexSearcher(ireader); 121 String keyword = "程序员"; 122 // 使用QueryParser查询分析器构造Query对象 123 // eg:单个字段查询 124 // String fieldName = "content"; 125 // QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, analyzer); 126 String[] fields = { "filename", "content" }; 127 QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_40, fields, analyzer); 128 qp.setDefaultOperator(QueryParser.AND_OPERATOR); 129 Query query = qp.parse(keyword); 130 // 搜索相似度最高的5条记录 131 TopDocs topDocs = isearcher.search(query, 25); 132 System.out.println("命中:" + topDocs.totalHits); 133 // 输出结果 134 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 135 for (int i = 0; i < topDocs.totalHits; i++) { 136 Document targetDoc = isearcher.doc(scoreDocs[i].doc); 137 System.out.println("内容:" + targetDoc.toString()); 138 } 139 // 分页,高亮显示 140 higherIndex(analyzer, isearcher, query, topDocs); 141 } 142 143 public static void main(String[] args) { 144 IndexTools tool = new IndexTools(); 145 try { 146 tool.searchIndex(); 147 } catch (ParseException e) { 148 System.out.println("解析错误"); 149 } catch (IOException e) { 150 System.out.println("读取文件流错误"); 151 } catch (InvalidTokenOffsetsException e) { 152 System.out.println("查询失败"); 153 } 154 } 155 156 /** 157 * 分页,高亮显示 158 * 159 * @param analyzer 160 * @param isearcher 161 * @param query 162 * @param topDocs 163 * @throws IOException 164 * @throws InvalidTokenOffsetsException 165 */ 166 public void higherIndex(Analyzer analyzer, IndexSearcher isearcher, Query query, TopDocs topDocs) 167 throws IOException, InvalidTokenOffsetsException { 168 TopScoreDocCollector results = TopScoreDocCollector.create(topDocs.totalHits, false); 169 isearcher.search(query, results); 170 // 分页取出指定的doc(开始条数, 取几条) 171 ScoreDoc[] docs = results.topDocs(1, 2).scoreDocs; 172 for (int i = 0; i < docs.length; i++) { 173 Document targetDoc = isearcher.doc(docs[i].doc); 174 System.out.println("内容:" + targetDoc.toString()); 175 } 176 // 关键字高亮显示的html标签,需要导入lucene-highlighter-3.5.0.jar 177 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>"); 178 Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); 179 for (int i = 0; i < docs.length; i++) { 180 Document doc = isearcher.doc(docs[i].doc); 181 // 标题增加高亮显示 182 TokenStream tokenStream1 = analyzer.tokenStream("filename", new StringReader(doc.get("filename"))); 183 String title = highlighter.getBestFragment(tokenStream1, doc.get("filename")); 184 // 内容增加高亮显示 185 TokenStream tokenStream2 = analyzer.tokenStream("content", new StringReader(doc.get("content"))); 186 String content = highlighter.getBestFragment(tokenStream2, doc.get("content")); 187 System.out.println(doc.get("filename") + " : " + title + " : " + content); 188 } 189 } 190 }