• lucene中的IndexWriter.setMaxFieldLength()


    lucene中的IndexWriter.setMaxFieldLength()

    老版本的Lucene中,IndexWriter的maxFieldLength是指一个索引中的最大的Field个数。

    这个属性在Lucene2.9.0中是不可见的,对其的修改被放在相应的setMaxFieldLength(Int l)和getMaxFiedLength()中;

    当索引中的Field的个数等于这个属性时,新增的任何field都会被忽略,即使对己经存在相同的Field新增内容也是不可以的。附上一个测试类(Lucene in action)

    package test;

    import java.io.File;

    import java.io.IOException;

    import junit.framework.TestCase;

    import org.apache.lucene.analysis.SimpleAnalyzer;

    import org.apache.lucene.document.Document;

    import org.apache.lucene.document.Field;

    import org.apache.lucene.index.IndexWriter;

    import org.apache.lucene.index.Term;

    import org.apache.lucene.search.IndexSearcher;

    import org.apache.lucene.search.Query;

    import org.apache.lucene.search.ScoreDoc;

    import org.apache.lucene.search.TermQuery;

    import org.apache.lucene.search.TopScoreDocCollector;

    import org.apache.lucene.store.Directory;

    import org.apache.lucene.store.FSDirectory;

    public class FieldLengthTest extends TestCase {

     private Directory dir;

     private String[] keywords = {"1", "2"};

     private String[] unindexed = {"Netherlands", "Italy"};

     private String[] unstored = {"Amsterdam has lots of bridges",

                                  "Venice has lots of canals"};

     private String[] text = {"Amsterdam", "Venice"};

     protected void setUp() throws IOException {

       String indexDir =

         System.getProperty("java.io.tmpdir", "tmp") +

         System.getProperty("file.separator") + "index-dir";

       dir = FSDirectory.open(new File(indexDir));

     }

     public void testFieldSize() throws IOException {

       addDocuments(dir, 10);

       assertEquals(1, getHitCount("contents", "bridges")); 

       addDocuments(dir, 1); 

       assertEquals(0, getHitCount("contents", "bridges"));

     }

     private int getHitCount(String fieldName, String searchString)

       throws IOException {

       IndexSearcher searcher = new IndexSearcher(dir, true);

       Term t = new Term(fieldName, searchString);

       Query query = new TermQuery(t);

       TopScoreDocCollector tsdc = TopScoreDocCollector.create(10, false);

       searcher.search(query, tsdc);

       ScoreDoc[] hits = tsdc.topDocs().scoreDocs;

       int hitCount = hits.length;

       searcher.close();

       return hitCount;

     }

     private void addDocuments(Directory dir, int maxFieldLength)

       throws IOException {

       IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(),

         true, IndexWriter.MaxFieldLength.LIMITED);

       writer.setMaxFieldLength(maxFieldLength); 

       for (int i = 0; i < keywords.length; i++) {

         Document doc = new Document();

         doc.add(new Field("contents", unstored[i], Field.Store.YES, Field.Index.ANALYZED));

         //doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));

         doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));

         doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));

         doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));

         writer.addDocument(doc);

       }

       writer.optimize();

       writer.close();

     }

    }

    (转自:http://blog.sina.com.cn/s/blog_49b531af0100it66.html)

  • 相关阅读:
    棋盘型动态规划 之 CODE[VS] 1220 数字三角形
    棋盘型动态规划 之 CODE[VS] 1219 骑士游历 1997年
    棋盘型动态规划 之 CODE[VS] 1169 传纸条 2008年NOIP全国联赛提高组
    棋盘型动态规划 之 CODE[VS] 1010 过河卒 2002年NOIP全国联赛普及组
    [实验]自举?只不过是电容和二极管捣的乱
    [嵌入式]I2C协议指东
    [语法]C语言中二维数组做输入参数
    深入类方法。
    今天周六祝大家休息开心,我还在奋斗....
    我们一起来一把.....现在学习真的好累,一天下来脑袋要炸
  • 原文地址:https://www.cnblogs.com/fengweixin/p/3598107.html
Copyright © 2020-2023  润新知