索引工具的三部分
1.索引部分
2.分词部分
3.搜索部分
查看索引的工具:luke java -jar fileName.jar
目标:为文件夹的所有的文档生成索引并搜索它
package com.lucene; import java.io.File; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; // lucene 使用的版本为 3.5 public class HelloLucene { public static void main(String[] args) { HelloLucene hl = new HelloLucene(); hl.index(); hl.search(); } /** * 建立文档索引 */ public void index() { IndexWriter writer = null; try { //1.创建Director(确定索引建立的位置) // Directory directory = new RAMDirectory(); //在内存中建立 Directory directory = FSDirectory.open(new File("d:/index_01")); //2.通过 IndexWriter 写索引 IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)); writer = new IndexWriter(directory, iwc); //3.创建 Document 对象 Document doc = null; File f = new File("G:/lucene/"); for(File file : f.listFiles()){ //4.为文档 添加Field (文档的每个属性比如名称可以称之为文档的一个Field) doc = new Document(); doc.add(new Field("content", new FileReader(file))); doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); //5.通过IndexWrite添加文档到索引中 writer.addDocument(doc); // 在索引库没有建立并且没有索引文件的时候首先要commit一下让他建立一个 索引库的版本信息 writer.commit(); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { try { if(writer != null) writer.close(); } catch (CorruptIndexException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } e.printStackTrace(); } } /** * 搜索 */ public void search(){ try { //1.创建Directory Directory directory = FSDirectory.open(new File("d:/index_01")); //2.创建IndexReader IndexReader reader = IndexReader.open(directory); //3.根据IndexReader创建IndexSearcher IndexSearcher searcher = new IndexSearcher(reader); //4.创建搜索的的Query QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35)); //要搜索的内容 Query query = parser.parse("document"); //5.根据searcher搜索并返回TopDocs TopDocs tds = searcher.search(query, 100); //6.根据TopDocs获取scoreDocs对象 ScoreDoc[] sds = tds.scoreDocs; for(ScoreDoc sd:sds) { //7.根据Search和ScoreDoc对象获取具体的Document对象 Document d = searcher.doc(sd.doc); //8.根据document对象获取需要的值 System.out.println(d.get("filename")+"|"+d.get("path")); } System.out.println(sds.length); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } //2. } }