• lucene测试类


    package test.lucene;

    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.util.Date;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;

    public class TextFileIndexer {
    public static void main(String[] args) throws Exception {
    /* 指明要索引文件夹的位置,这里是C盘的source文件夹下 */
    File fileDir = new File("C:\source");
    /* 这里放索引文件的位置 */
    File indexDir = new File("C:\index");
    Directory dir=FSDirectory.open(indexDir);
    Analyzer analyzer=new StandardAnalyzer(Version.LUCENE_36);
    IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_36, analyzer);
    indexWriterConfig.setOpenMode(OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(dir,indexWriterConfig);
    File[] textFiles = fileDir.listFiles();
    long startTime = new Date().getTime();
    //增加document到索引去
    for (int i = 0; i < textFiles.length; i++) {
    if (textFiles[i].isFile()
    && textFiles[i].getName().endsWith(".txt")) {
    System.out.println("File " + textFiles[i].getCanonicalPath()
    + "正在被索引....");
    String temp = FileReaderAll(textFiles[i].getCanonicalPath(),
    "GBK");
    System.out.println(temp);
    Document document = new Document();
    //建立field
    Field FieldPath = new Field("path", textFiles[i].getPath(),
    Field.Store.YES, Field.Index.NO);
    Field FieldBody = new Field("body", temp, Field.Store.YES,
    Field.Index.ANALYZED,
    Field.TermVector.WITH_POSITIONS_OFFSETS);
    //添加到document.
    document.add(FieldPath);
    document.add(FieldBody);
    //添加到indexWriter中.
    indexWriter.addDocument(document);
    }
    }
    indexWriter.close();

    //测试一下索引的时间
    long endTime = new Date().getTime();
    System.out
    .println("这花费了"
    + (endTime - startTime)
    + " 毫秒来把文档增加到索引里面去!"
    + fileDir.getPath());
    }
    public static String FileReaderAll(String FileName, String charset) throws IOException {
    BufferedReader reader = new BufferedReader(new InputStreamReader(
    new FileInputStream(FileName), charset));
    String line = new String();
    String temp = new String();

    while ((line = reader.readLine()) != null) {
    temp += line;
    }
    reader.close();
    return temp;
    }
    }

  • 相关阅读:
    hdu 5001 从任意点出发任意走d步不经过某点概率
    hdu 5007
    hdu 5009 离散化
    hdu 5011 Nim+拿完分堆
    thinkphp 删除多条记录
    thinkphp 实现无限极分类
    图片生成唯一的名字
    html 标签学习
    PHP比较运算!=和!==
    php使用 set_include_path
  • 原文地址:https://www.cnblogs.com/likeju/p/5090727.html
Copyright © 2020-2023  润新知