对于每天更新的索引,可以采用增量更新,例子如下:
例如: pid mondayCv 123 23000 第二天 pid mondayCv tuesdayCv 123 23000 45000
package com.sachie.lucene.test; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; import com.sachie.lucene.model.TestObject; public class CreateTeest { /** * @param args */ static IndexWriterConfig conf = null; static { Analyzer analysis = new StandardAnalyzer(Version.LUCENE_36); conf = new IndexWriterConfig(Version.LUCENE_36, analysis); conf.setOpenMode(OpenMode.CREATE_OR_APPEND); conf.setRAMBufferSizeMB(512.00); } public void addDoc(Document doc, String name, String value) { doc.add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED)); } public List<TestObject> getIndexObject(File file) throws IOException { List<TestObject> objectList = new ArrayList<TestObject>(); if (!file.exists()) throw new FileNotFoundException(); else { BufferedReader br = new BufferedReader(new FileReader(file)); String tempStr = null; while ((tempStr = br.readLine()) != null) { String[] tempStrs = tempStr.split("\t"); objectList.add(new TestObject(tempStrs[0], tempStrs[1], tempStrs[2])); } br.close(); } return objectList; } public void createIndex() throws IOException { String sourcePath = "d:\\data"; String target = "d:\\testIndex"; File files = new File(sourcePath); IndexWriter indexWriter = null; boolean create = false; Directory directory = new SimpleFSDirectory(new File(target)); IndexSearcher searcher = null; indexWriter = new IndexWriter(directory, conf); for (int i = 0; i < files.listFiles().length; i++) { if (i != 0) { searcher = new IndexSearcher(IndexReader.open(directory)); } File file = files.listFiles()[i]; String date = file.getName(); List<TestObject> list = this.getIndexObject(file); try { for (TestObject tmp : list) { Document doc = new Document(); addDoc(doc, "pid", tmp.getPid()); addDoc(doc, date + "cvOne", tmp.getCvOne()); addDoc(doc, date + "cvAll", tmp.getCvAll()); if (i == 0) indexWriter.addDocument(doc); else this.searchAndUpdateDocument(indexWriter, searcher, doc, new Term("pid", tmp.getPid())); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } // indexWriter.forceMerge(1); indexWriter.commit(); indexWriter.forceMerge(1); } indexWriter.close(); } public void searchAndUpdateDocument(IndexWriter writer, IndexSearcher searcher, Document updateDoc, Term term) throws IOException { TermQuery query = new TermQuery(term); TopDocs hits = searcher.search(query, 10); if (hits.scoreDocs.length == 0) { writer.addDocument(updateDoc); } else if (hits.scoreDocs.length > 1) { throw new IllegalArgumentException( "Given Term matches more than 1 document in the index."); } else { int docId = hits.scoreDocs[0].doc; Document doc = searcher.doc(docId); List<Fieldable> replacementFields = updateDoc.getFields(); for (Fieldable field : replacementFields) { String name = field.name(); String currentValue = doc.get(name); if (currentValue != null) { doc.removeFields(name); doc.add(field); } else { doc.add(field); } } writer.updateDocument(term, doc); } } public static void main(String args[]) throws IOException { CreateTeest ic = new CreateTeest(); ic.createIndex(); } }