• Lucene查询简述


    本示例是对Lucene查询,对结果进行了一些处理(Lucene 3.5):

    1、支持前缀搜索,如*国,可以搜索出中国、美国等国字结尾的词的内容:
            支持后缀匹配,如国* 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。
      parser.setAllowLeadingWildcard(true);

      2、搜索时在有通配符时可以不区分大小写:

      // 有通配符时不转换大小写
      parser.setLowercaseExpandedTerms(false);

      3、结果进行多字段排序,详细见代码排序部分;

      4、结果高亮显示,详细见代码高亮部分。

     package cn.test.gxg.engine.query; 
      
     import java.io.File; 
     import java.io.IOException; 
     import java.io.StringReader; 
      
     import org.apache.lucene.analysis.Analyzer; 
     import org.apache.lucene.analysis.TokenStream; 
     import org.apache.lucene.analysis.standard.StandardAnalyzer; 
     import org.apache.lucene.document.Document; 
     import org.apache.lucene.document.Field; 
     import org.apache.lucene.document.Fieldable; 
     import org.apache.lucene.document.NumericField; 
     import org.apache.lucene.document.Field.Store; 
     import org.apache.lucene.index.CorruptIndexException; 
     import org.apache.lucene.index.IndexReader; 
     import org.apache.lucene.index.IndexWriter; 
     import org.apache.lucene.queryParser.ParseException; 
     import org.apache.lucene.queryParser.QueryParser; 
     import org.apache.lucene.search.IndexSearcher; 
     import org.apache.lucene.search.Query; 
     import org.apache.lucene.search.ScoreDoc; 
     import org.apache.lucene.search.Searcher; 
     import org.apache.lucene.search.Sort; 
     import org.apache.lucene.search.SortField; 
     import org.apache.lucene.search.TopDocs; 
     import org.apache.lucene.search.highlight.Highlighter; 
     import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 
     import org.apache.lucene.search.highlight.QueryScorer; 
     import org.apache.lucene.search.highlight.SimpleFragmenter; 
     import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 
     import org.apache.lucene.store.FSDirectory; 
     import org.apache.lucene.store.LockObtainFailedException; 
     import org.apache.lucene.util.Version; 
      
     /** 
      * 创建索引并查询示例 
      * 
      * @createTime: Feb 22, 2010 3:02:28 PM 
      * @author:  <a href="mailto:leader1212@sina.com.cn">天涯 </a> 
      * @version: 0.1 
      * @lastVersion: 0.1 
      * @updateTime: 
      * @updateAuthor:  <a href="mailto:leader1212@sina.com.cn">天涯 </a> 
      * @changesSum: 
      *  
      */ 
     public class QueryTest { 
         public static void main(String[] args) { 
             //索引目录   D:\workspace\code\java\TestLucene3\index\txt\test 
             String INDNEX_PATH = "D:\\workspace\\code\\java\\TestLucene3\\index\\txt\\test"; 
             createIndex(INDNEX_PATH); 
             search(INDNEX_PATH); 
         } 
          
         public static void createIndex (String indexPath) { 
             // 获取中文分词器,查询的时候也要用一样的分词器。不然会导致查询结果不准确 
             Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 
             // 建立索引 
             IndexWriter writer; 
             NumericField nField = null; 
             try { 
                 writer = new IndexWriter(FSDirectory.open(new File(indexPath)), 
                         analyzer, true, IndexWriter.MaxFieldLength.LIMITED); 
      
                 Document doc = new Document(); 
                 Field field = null; 
                 for(int i =0; i  <10; i++) { 
                     doc = new Document(); 
                     field = new Field("Code", "feinnocdb_App_info"+i, Field.Store.YES, 
                             Field.Index.ANALYZED); 
                     doc.add(field); 
                     nField = new NumericField("Id", Store.YES, true).setIntValue(i%3); 
                     doc.add(nField); 
                     field = new Field("Name", "国家名字-" + i, Field.Store.YES, 
                             Field.Index.ANALYZED); 
                     doc.add(nField); 
                     field = new Field("Content", "中国中华人民共和国—" + i, Field.Store.YES, 
                             Field.Index.ANALYZED); 
                     doc.add(field); 
                     nField = new NumericField("Type", Store.YES, true).setIntValue((i%10)); 
                     doc.add(nField); 
                     nField = new NumericField("Price", Store.YES, true).setFloatValue((i%3)); 
                     doc.add(nField); 
                     nField = new NumericField("Sex", Store.YES, true).setIntValue((i%2)); 
                     doc.add(nField); 
                     writer.addDocument(doc); 
                 } 
                 writer.close(); 
                 System.out.println("Indexed success!"); 
             } catch (Exception e) { 
                 e.printStackTrace(); 
             } 
         } 
          
         public static void search(String indexPath) { 
             //获取Lucene标准分词器,可以使用其他分词器,前提是创建索引的时候也使用相同的分词器       
             Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); 
             //建立索引       
             try { 
                 IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); 
                 QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "Content", analyzer); 
                 Query query = null; 
                 String q = "Content:国"; 
                 try { 
                     query = parser.parse(q); 
                     // 支持后缀匹配,如*国 则可以搜索中国、美国等以国字结尾的词,*:*可以查询所有索引。 
                     parser.setAllowLeadingWildcard(true); 
                     // 有通配符时不转换大小写 
                     parser.setLowercaseExpandedTerms(false); 
                 } catch (ParseException e) { 
                     e.printStackTrace(); 
                 } 
                  
                 // 多字段排序,设置在前面的会优先排序 
                 SortField[] sortFields = new SortField[2]; 
                 SortField sortField = new SortField("Id", SortField.INT, true); 
                 SortField FIELD_SEX = new SortField("Sex", SortField.INT, false); 
                 sortFields[0] = sortField; 
                 sortFields[1] = FIELD_SEX; 
                 Sort sort = new Sort(sortFields); 
                 // 单字段排序 
                 /* 
                 SortField sortField = new SortField("Id", SortField.INT, true); 
                 Sort sort = new Sort(sortField); 
                  */ 
                  
                 Searcher searcher = new IndexSearcher(reader); 
                 // 如果不需要排序则使用注释掉的代码查询 
                            // TopDocs topDocs = searcher.search(query, 100); 
                 TopDocs topDocs = searcher.search(query, null, 1000, sort); 
                  
                 System.out.println("查询语句为:" + query.toString()); 
                 System.out.println("查询到数据条数为:" + topDocs.totalHits); 
                 if (topDocs.totalHits != 0) { 
                     // 用作高亮显示的Query语句。绝大多数情况都是使用查询的Query语句。 
                     // 这里为了演示,所以不那样做 
                     Query hilightQuery = null; 
                     try { 
                         hilightQuery = parser.parse("Content:中"); 
                     } catch (ParseException e) { 
                         // TODO Auto-generated catch block 
                         e.printStackTrace(); 
                     } 
                      
                     // 设置需要高亮的字段值 
                     String[] highlightCol = {"Content", "Name"}; 
                     Highlighter highlighter = null; 
                     // 关键字高亮显示设置 
                     // 设定高亮显示的格式,也就是对高亮显示的词组加上前缀后缀   
                     SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(" <FONT COLOR='RED'>", " </FONT>"); 
                     highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(hilightQuery)); 
                      
                     //设置每次返回的字符数 
                     highlighter.setTextFragmenter(new SimpleFragmenter(1000));  
                      
                         // 遍历查询的索引,得到具体索引值。 
                     for(ScoreDoc sd : topDocs.scoreDocs) { 
                         Document document = searcher.doc(sd.doc); 
                         for (Fieldable fa : document.getFields()) { 
                             String value = document.get(fa.name()); 
                             for (String col : highlightCol) { 
                                 if(fa.name().equals(col)) { 
                                     //设置高显内容 
                                     TokenStream tokenStream = analyzer.tokenStream("Content",new StringReader(value));  
                                     value = highlighter.getBestFragment(tokenStream, value); 
                                 } 
                             } 
                             System.out.print(fa.name() + ":" + value + "  "); 
                         } 
                         System.out.println(); 
                     } 
                 } 
                 reader.close(); 
             } catch (CorruptIndexException e) { 
                 // TODO Auto-generated catch block       
                 e.printStackTrace(); 
             } catch (LockObtainFailedException e) { 
                 // TODO Auto-generated catch block       
                 e.printStackTrace(); 
             } catch (IOException e) { 
                 // TODO Auto-generated catch block       
                 e.printStackTrace(); 
             } catch (InvalidTokenOffsetsException e) { 
                 // TODO Auto-generated catch block 
                 e.printStackTrace(); 
             } 
         } 
     } 
  • 相关阅读:
    iOS开发 日常错误积累
    Linux编程---I/O部分
    Binder机制1---Binder原理介绍
    hdu4405概率dp入门
    SSL连接建立过程分析(1)
    用GDB调试程序(一)
    RBAC权限管理
    HDU3930(离散对数与原根)
    Java实现 蓝桥杯VIP 算法提高 特殊的质数肋骨
    Java实现 蓝桥杯VIP 算法提高 产生数
  • 原文地址:https://www.cnblogs.com/lcuzhanglei/p/2618456.html
Copyright © 2020-2023  润新知