• lucene学习-3


    内容就是标题了。是要重构下上一节的代码,大体上按如下的思路:

    • 功能拆分;
    • 创建必要的工具类;

    两个工具类StringUtils和TxtUtils。

    StringUtils,主要是获取当前系统的换行符:

    package com.zhyea.util;
    
    public class StringUtils {
    
        public static final String NEWLINE = System.getProperty("line.separator");
        
    }

    TxtUtils,主要是读取txt文件,这里使用了一个自定义类FileCharsetDetector,可以点击这个超链接

    package com.zhyea.util;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    
    /**
     * txt文件处理工具类
     * 
     * @author robin
     * 
     */
    public class TxtUtils {
    
        /**
         * 检查txt文件编码格式
         * 
         * @param file
         *            txt文件对象
         * @return
         * @throws IOException
         */
        public static String checkEncode(File file) throws IOException {
            String encode = FileCharsetDetector.checkEncoding(file);
            return (encode.equals("windows-1252") ? "Unicode" : encode);
        }
    
        /**
         * 读取txt文件内容
         * 
         * @param file
         *            Txt文件对象
         * @return
         * @throws IOException
         */
        public static String readTxt(File file) throws IOException {
            BufferedReader reader = null;
            try {
                String encode = checkEncode(file);
                reader = new BufferedReader(new InputStreamReader(
                        new FileInputStream(file), encode));
                StringBuilder builder = new StringBuilder();
                String content = null;
                while (null != (content = reader.readLine())) {
                    builder.append(content).append(StringUtils.NEWLINE);
                }
                return builder.toString();
            } finally {
                reader.close();
            }
        }
    
    }

    然后是拆分后的Lucene操作类:

    package com.zhyea.doggie;
    
    import java.io.File;
    import java.io.IOException;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.document.TextField;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    
    import com.zhyea.util.TxtUtils;
    
    public class DoggieLucene {
    
        /**
         * 分词器
         */
        private static Analyzer analyzer;
    
        /**
         * 创建分词器实例
         * 
         * @param clazz
         *            创建分词器使用的类
         * @return
         * @throws InstantiationException
         * @throws IllegalAccessException
         */
        public static Analyzer createAnalyzer(Class<?> clazz)
                throws InstantiationException, IllegalAccessException {
            if (null != analyzer && analyzer.getClass().equals(clazz)) {
                return analyzer;
            }
            return analyzer = (Analyzer) clazz.newInstance();
        }
    
        /**
         * 创建索引写出器
         * 
         * @param analyzer
         *            分词器
         * @param indexPath
         *            索引存储路径
         * @return
         * @throws IOException
         */
        public static IndexWriter createIndexWriter(Analyzer analyzer,
                String indexPath) throws IOException {
            // 创建索引存储目录
            Directory dir = FSDirectory.open(new File(indexPath));
            // 创建索引写入器配置
            IndexWriterConfig config = new IndexWriterConfig(Version.LATEST,
                    analyzer);
            // 创建索引写入器
            return new IndexWriter(dir, config);
        }
    
        /**
         * 写入索引,索引文件为本地文本文件
         * 
         * @param writer
         *            索引写出器
         * @param localDocPath
         *            本地文本文件存储地址
         * @throws IOException
         */
        public static void addLocalDocument(IndexWriter writer, String localDocPath)
                throws IOException {
            File directory = new File(localDocPath);
            for (File tmp : directory.listFiles()) {
                Document doc = new Document();
                doc.add(new StringField("path", tmp.getCanonicalPath(),
                        Field.Store.YES));
                doc.add(new TextField("content", TxtUtils.readTxt(tmp),
                        Field.Store.YES));
                writer.addDocument(doc);
                writer.commit();
            }
        }
    
        /**
         * 创建索引写入器
         * 
         * @param indexPath
         *            索引存储路径
         * @return
         * @throws IOException
         */
        public static IndexReader createIndexReader(String indexPath)
                throws IOException {
            return DirectoryReader.open(FSDirectory.open(new File(indexPath)));
        }
    
        /**
         * 创建索引搜索器
         * 
         * @param reader
         *            索引写入器
         * @return
         */
        public static IndexSearcher createIndexSearcher(IndexReader reader) {
            return new IndexSearcher(reader);
        }
    
        /**
         * 执行搜索
         * 
         * @param searcher
         *            搜索器
         * @param target
         *            搜索对象
         * @return
         * @throws IOException
         */
        public static TopDocs executeSearch(IndexSearcher searcher, Query query)
                throws IOException {
            return searcher.search(query, 10000);
        }
    
        /**
         * 展示查询结果
         * 
         * @param docs
         *            查询结果文档
         * @throws IOException
         */
        public static void showResult(TopDocs docs, IndexReader reader)
                throws IOException {
            Document doc = null;
            for (ScoreDoc tmp : docs.scoreDocs) {
                doc = reader.document(tmp.doc);
                System.out.println(tmp.score + "  " + doc.get("path"));
                // System.out.println(doc.getField("path").stringValue());
            }
        }
    }

    拆的比较琐碎了,凑合看吧。

    创建索引的类:

    package com.zhyea.doggie;
    
    import java.io.IOException;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    import org.apache.lucene.index.IndexWriter;
    
    import com.zhyea.util.FileUtil;
    
    public class IndexTest{
        
        String indexPath = "D:\aqsiqDevelop\workspace3\doggie\WebContent\index";
        String docPath = "D:\aqsiqDevelop\workspace3\doggie\WebContent\docs";
        
        public static void main(String[] args){
            try{
                new IndexTest().createIndex();
            }catch(Exception e){
                e.printStackTrace();
            }
        }
        
        /**
         * 创建索引
         * @throws IOException
         * @throws InstantiationException
         * @throws IllegalAccessException
         */
        private void createIndex() throws IOException, 
                                          InstantiationException, 
                                          IllegalAccessException{
            IndexWriter writer = null;
            try{
                Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);
                writer = DoggieLucene.createIndexWriter(analyzer, indexPath);
                DoggieLucene.addLocalDocument(writer, docPath);
            }finally{
                if(null!=writer)writer.close();
            }
        }
    }

    执行搜索的类:

    package com.zhyea.doggie;
    
    import java.io.IOException;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.queryparser.classic.ParseException;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.TopDocs;
    
    public class SearchTest {
    
        String indexPath = "D:\aqsiqDevelop\workspace3\doggie\WebContent\index";
        
        public static void main(String[] args){
            try{
                new SearchTest().executeSearch();
            }catch(Exception e){
                e.printStackTrace();
            }
        }
        
        public void executeSearch() throws ParseException, 
                                           IOException, 
                                           InstantiationException, 
                                           IllegalAccessException{
            IndexReader reader = null;
            try{
                reader = DoggieLucene.createIndexReader(indexPath);
                IndexSearcher searcher = DoggieLucene.createIndexSearcher(reader);
                Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);
                Query query = new QueryParser("content", analyzer).parse("杨过");
                TopDocs docs = DoggieLucene.executeSearch(searcher, query);
                DoggieLucene.showResult(docs, reader);
            }finally{
                if(null!=reader)reader.close();
            }
        }
    }

    OK。

  • 相关阅读:
    Python(2.7.6) 特殊方法
    Python(2.7.6) 列表推导式
    代码神注释鉴赏,喜欢拿去用
    为什么说重启能解决90%的问题
    为什么说重启能解决90%的问题
    编程语言简史
    编程语言简史
    Shell脚本中循环select命令用法笔记
    程序员职业规划
    程序员职业规划
  • 原文地址:https://www.cnblogs.com/amunote/p/4178542.html
Copyright © 2020-2023  润新知