• lecune入门示例


      注意:本示例中的lucene版本需在jdk7以上使用。

    一、pom.xml

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
        <groupId>cd.jeryzhi</groupId>
        <artifactId>luceneDemo</artifactId>
        <version>1.0</version>
        <name>${project.artifactId}</name>
        
        <dependencies>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-core</artifactId>
                <version>5.0.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-queryparser</artifactId>
                <version>5.0.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.lucene</groupId>
                <artifactId>lucene-analyzers-common</artifactId>
                <version>5.0.0</version>
            </dependency>
        </dependencies>
        
    </project>

    二、代码:

      

    package luceneDemo;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.util.ArrayList;
    import java.util.List;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field.Store;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.RAMDirectory;
    
    public class FindTxtManager {
    
        public static void main(String[] args) {
            
            find("C:\Users\Administrator\Desktop\新建文件夹", "Application Strategy and Integration","pdf");
    
        }
    
        public static void find(String dirPath, String findStr,String fileType) {
            try {
                Directory directory = new RAMDirectory();
                Analyzer analyzer = new StandardAnalyzer();
                IndexWriterConfig config = new IndexWriterConfig(analyzer);
                IndexWriter iwriter = new IndexWriter(directory, config);
    
                File[] files = new File(dirPath).listFiles();
                List<File> fileList = new ArrayList<File>();
                StringBuffer sb = new StringBuffer();
                for (File file : files) {
                    if (file.getName().lastIndexOf("."+fileType) > 0) {
                        fileList.add(file);
                        String fileStr = txt2String(file);
                        sb.append(fileStr);
                        Document document = new Document();
                        document.add(new TextField("filename", file.getName(), Store.YES));
                        document.add(new TextField("content", fileStr, Store.YES));
                        document.add(new TextField("path", file.getPath(), Store.YES));
                        iwriter.addDocument(document);
                        // iwriter.commit();
                    }
                }
                iwriter.close();
    
                DirectoryReader ireader = DirectoryReader.open(directory);
                IndexSearcher isearcher = new IndexSearcher(ireader);
    
                QueryParser parser = new QueryParser("content", analyzer);
                Query query = parser.parse(findStr);
                ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
    
                System.out.println(hits.length);
                for (int i = 0; i < hits.length; i++) {
                    Document hitDoc = isearcher.doc(hits[i].doc);
                    System.out.println("____________________________");
    //                System.out.println(hitDoc.get("filename"));
    //                System.out.println(hitDoc.get("content"));
                    System.out.println(hitDoc.get("path"));
                    System.out.println("____________________________");
                }
                ireader.close();
                directory.close();
    
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        public static String txt2String(File file) {
            StringBuffer sb = new StringBuffer();
            try {
                BufferedReader br = new BufferedReader(new FileReader(file));// 构造一个BufferedReader类来读取文件
                String s = null;
                while ((s = br.readLine()) != null) {// 使用readLine方法,一次读一行
                    sb.append("
    ").append(s);
                }
                br.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
            return sb.toString();
        }
    }

    输出:

    1
    ____________________________
    C:UsersAdministratorDesktop新建文件夹java.pdf
    ____________________________
  • 相关阅读:
    webkit webApp 开发技术要点总结
    EJB 教程推荐
    MySQL 教程分享
    php 教程列表
    html 学习资料列表
    JAVA 教程推荐
    php+mysql预查询prepare 与普通查询的性能对比
    Spring 5 新特性:函数式Web框架
    Java多线程之并发协作生产者消费者设计模式
    php使用file函数、fseek函数读取大文件效率分析
  • 原文地址:https://www.cnblogs.com/shoubianxingchen/p/6479544.html
Copyright © 2020-2023  润新知