• lucene学习的小结


    pom.xml设置

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
    
          <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.3.1</version>
        </dependency>
        
        <dependency>
        <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.3.1</version>
        </dependency>
        
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.3.1</version>
        </dependency>
        
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>5.3.1</version>
        </dependency>
        
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>5.3.1</version>
        </dependency>

    生成索引IndexingTest.java

    package com.chabansheng.lucene;
    
    import java.nio.file.Paths;
    
    import ...;public class IndexingTest {
    
        private String ids[]={"1","2","3","4"};
        private String authors[]={"Jack","Marry","John","Json"};
        private String positions[]={"accounting","technician","salesperson","boss"};
        private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};
        private String contents[]={
                "If possible, use the same JRE major version at both index and search time.",
                "When upgrading to a different JRE major version, consider re-indexing. ",
                "Different JRE major versions may implement different versions of Unicode,",
                "For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"
        };
        
        private Directory dir;
        
        /**
         * 生成索引
         * @throws Exception
         */
        @Test
        public void index()throws Exception{
            dir=FSDirectory.open(Paths.get("D:\lucene3"));
            //获取IndexWriter实例
            Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
            IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
            IndexWriter writer=new IndexWriter(dir, iwc);
            
            for(int i=0;i<ids.length;i++){
                Document doc=new Document();
                doc.add(new StringField("id", ids[i], Field.Store.YES));
                doc.add(new StringField("author",authors[i],Field.Store.YES));
                doc.add(new StringField("position",positions[i],Field.Store.YES));
                // 加权操作
                TextField field=new TextField("title", titles[i], Field.Store.YES);
                if("boss".equals(positions[i])){
                    field.setBoost(1.5f);
                }
                doc.add(field);
                doc.add(new TextField("content", contents[i], Field.Store.NO));
                writer.addDocument(doc); // 添加文档
            }
            writer.close();
        }
    
        /**
         * 查询索引方式一
         * @throws Exception
         */
        @Test
        public void search()throws Exception{
            dir=FSDirectory.open(Paths.get("D:\lucene"));
            IndexReader reader=DirectoryReader.open(dir);
            IndexSearcher is=new IndexSearcher(reader);
            String searchField="title";
            String q="java";
            //Term方式查询
            Term t=new Term(searchField,q);
            Query query=new TermQuery(t);
            
            TopDocs hits=is.search(query, 10);
            System.out.println("匹配 '"+q+"',总共查询到"+hits.totalHits+"个文档");
            for(ScoreDoc scoreDoc:hits.scoreDocs){
                Document doc=is.doc(scoreDoc.doc);
                System.out.println(doc.get("author"));
            }
            reader.close();
        }
        
    }

    查询索引方式二Searcher.java

    package com.chabansheng.lucene;
    
    import java.io.StringReader;
    import java.nio.file.Paths;
    
    import ...;public class Searcher {
    
        public static void search(String indexDir,String q)throws Exception{
            Directory dir=FSDirectory.open(Paths.get(indexDir));
            IndexReader reader=DirectoryReader.open(dir);
            IndexSearcher is=new IndexSearcher(reader);
            //QueryParser查询方式
            // Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
            SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
            QueryParser parser=new QueryParser("desc", analyzer);
            Query query=parser.parse(q);
            
            TopDocs hits=is.search(query, 10);
            //高亮行号
            QueryScorer scorer=new QueryScorer(query);
            Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);
            SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
            Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
            highlighter.setTextFragmenter(fragmenter);
            
            for(ScoreDoc scoreDoc:hits.scoreDocs){
                Document doc=is.doc(scoreDoc.doc);
                System.out.println(doc.get("city"));
                System.out.println(doc.get("desc"));
                String desc=doc.get("desc");
                if(desc!=null){
                    TokenStream tokenStream=analyzer.tokenStream("desc", new StringReader(desc));
                    System.out.println(highlighter.getBestFragment(tokenStream, desc));
                }
            }
            reader.close();
        }
        
        public static void main(String[] args) {
            String indexDir="D:\lucene2";
            String q="南京文明";
            try {
                search(indexDir,q);
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }

    package com.chabansheng.lucene;
    import java.nio.file.Paths;
    import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.junit.Test;
    public class IndexingTest {
    private String ids[]={"1","2","3","4"};private String authors[]={"Jack","Marry","John","Json"};private String positions[]={"accounting","technician","salesperson","boss"};private String titles[]={"Java is a good language.","Java is a cross platform language","Java powerful","You should learn java"};private String contents[]={"If possible, use the same JRE major version at both index and search time.","When upgrading to a different JRE major version, consider re-indexing. ","Different JRE major versions may implement different versions of Unicode,","For example: with Java 1.4, `LetterTokenizer` will split around the character U+02C6,"};private Directory dir;/** * 生成索引 * @throws Exception */@Testpublic void index()throws Exception{dir=FSDirectory.open(Paths.get("D:\lucene3"));//获取IndexWriter实例Analyzer analyzer=new StandardAnalyzer(); // 标准分词器IndexWriterConfig iwc=new IndexWriterConfig(analyzer);IndexWriter writer=new IndexWriter(dir, iwc);for(int i=0;i<ids.length;i++){Document doc=new Document();doc.add(new StringField("id", ids[i], Field.Store.YES));doc.add(new StringField("author",authors[i],Field.Store.YES));doc.add(new StringField("position",positions[i],Field.Store.YES));// 加权操作TextField field=new TextField("title", titles[i], Field.Store.YES);if("boss".equals(positions[i])){field.setBoost(1.5f);}doc.add(field);doc.add(new TextField("content", contents[i], Field.Store.NO));writer.addDocument(doc); // 添加文档}writer.close();}
    /** * 查询 * @throws Exception */@Testpublic void search()throws Exception{dir=FSDirectory.open(Paths.get("D:\lucene3"));IndexReader reader=DirectoryReader.open(dir);IndexSearcher is=new IndexSearcher(reader);String searchField="title";String q="java";//Term方式查询Term t=new Term(searchField,q);Query query=new TermQuery(t);TopDocs hits=is.search(query, 10);System.out.println("匹配 '"+q+"',总共查询到"+hits.totalHits+"个文档");for(ScoreDoc scoreDoc:hits.scoreDocs){Document doc=is.doc(scoreDoc.doc);System.out.println(doc.get("author"));}reader.close();}}

  • 相关阅读:
    CSAPP阅读笔记-struct, union, 数据对齐-来自第三章3.9的笔记-P183-P191
    CSAPP阅读笔记-数组分配与访问-来自第三章3.8的笔记-P176-P183
    深入理解静态方法和实例化方法的区别
    通俗讲解静态方法和实例方法的区别
    ArcGis中的类模型图目录
    C++ Primer(第四版) 课后习题6.8 统计空格制表符换行的数目
    C++ Primer(第四版) 课后习题4.30
    string类sizeof大小
    C++ Primer(第四版) 课后习题4.18
    C++ Primer(第四版) 课后习题3.14 vector单词转大写
  • 原文地址:https://www.cnblogs.com/375163374lsb/p/10542985.html
Copyright © 2020-2023  润新知