• lucene 学习一


    索引工具的三部分
      1.索引部分
      2.分词部分
      3.搜索部分

    查看索引的工具:luke   java -jar fileName.jar

    目标:为文件夹的所有的文档生成索引并搜索它

    package com.lucene;
    
    import java.io.File;
    import java.io.FileReader;
    import java.io.IOException;
    
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.queryParser.ParseException;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.LockObtainFailedException;
    import org.apache.lucene.util.Version;
    
    // lucene 使用的版本为 3.5
    public class HelloLucene {
    
        
        public static void main(String[] args) {
            HelloLucene hl = new HelloLucene();
            hl.index();
            hl.search();
        }
        
        /**
         * 建立文档索引
         */
        public void index() {
            
            IndexWriter writer = null;
            
            try {
                //1.创建Director(确定索引建立的位置)
    //            Directory directory = new RAMDirectory(); //在内存中建立
                Directory directory = FSDirectory.open(new File("d:/index_01"));
                
                //2.通过 IndexWriter 写索引
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));
                writer = new IndexWriter(directory, iwc);
                
                //3.创建 Document 对象
                Document doc = null;
                File f = new File("G:/lucene/");
                for(File file : f.listFiles()){
                    //4.为文档 添加Field (文档的每个属性比如名称可以称之为文档的一个Field)
                    doc = new  Document();
                    doc.add(new Field("content", new FileReader(file)));
                    doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                    doc.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
                    
                    //5.通过IndexWrite添加文档到索引中
                    writer.addDocument(doc);
                    
                    // 在索引库没有建立并且没有索引文件的时候首先要commit一下让他建立一个 索引库的版本信息  
                    writer.commit();
                }
    
            } catch (CorruptIndexException e) {
                e.printStackTrace();
            } catch (LockObtainFailedException e) {
                e.printStackTrace();
            } catch (IOException e) {
                    try {
                        if(writer != null) writer.close();
                    } catch (CorruptIndexException e1) {
                        e1.printStackTrace();
                    } catch (IOException e1) {
                        e1.printStackTrace();
                    }
                e.printStackTrace();
            }
            
        }
    
        
        /**
         * 搜索
         */
        public void search(){
            
            try {
                //1.创建Directory
                Directory directory = FSDirectory.open(new File("d:/index_01"));
                
                //2.创建IndexReader
                IndexReader reader = IndexReader.open(directory);
                
                //3.根据IndexReader创建IndexSearcher
                IndexSearcher searcher = new IndexSearcher(reader);
                
                //4.创建搜索的的Query
                QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
                
                //要搜索的内容
                Query query = parser.parse("document");
                
                //5.根据searcher搜索并返回TopDocs
                TopDocs tds = searcher.search(query, 100);
                
                //6.根据TopDocs获取scoreDocs对象
                ScoreDoc[] sds = tds.scoreDocs;
                
                for(ScoreDoc sd:sds)
                {
                    //7.根据Search和ScoreDoc对象获取具体的Document对象
                    Document  d = searcher.doc(sd.doc);
                    
                    //8.根据document对象获取需要的值                
                    System.out.println(d.get("filename")+"|"+d.get("path"));
                }
                
                System.out.println(sds.length);    
                
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            } catch (ParseException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            
            //2.
        }
    
    }
  • 相关阅读:
    内存溢出和内存泄露的概念,句柄泄露呢?句柄泄露造成的原因,待更新
    翻页查询的sql语句优化
    微服务下ELK统一日志系统搭建
    vscode添加自己的python虚拟环境
    【经验】如何成为培训师
    go并行编程1goroutine 孙龙
    简单说说物联网 孙龙
    golang恐慌和恢复panic/recover 孙龙
    golang监听rabbitmq消息队列任务断线自动重连接 孙龙
    rsync+inotifytools与rsync+sersync架构的区别 孙龙
  • 原文地址:https://www.cnblogs.com/siqi/p/3485263.html
Copyright © 2020-2023  润新知