• 自定义过滤器


    【需求】

    双十一或限时购的商品需要被放在指定区域进行搜索.如果重新建立索引消耗过大,方案:重新定义一个过滤器,然后通过新过滤器制定检索条件.

    【示例】

     1     /**
     2      * 为指定路径下的文件创建索引
     3      * @param filePath
     4      */
     5     public  void createIndex(String path, String filePath){
     6         IndexWriter writer = null;
     7         
     8         try{
     9             //创建Writer
    10             writer = util.getWriter(this.getDirectory(path), this.getConfig());
    11             
    12             //创建Document
    13             Document doc = null;
    14             
    15             //获取文件列表
    16             File list = new File(filePath);
    17             
    18             //创建索引
    19             int i = 0;
    20             for(File file : list.listFiles()){
    21                 doc = new Document();
    22                 //建立Id列
    23                 doc.add(new Field("id",String.valueOf(i++),Field.Store.YES,Field.Index.NOT_ANALYZED));
    24                 doc.add(new Field("filename",file.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
    25                 doc.add(new Field("modifydate",Long.toString(file.lastModified()),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS ));
    26                 //doc.add(new Field("size",getSize(file.length()),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
    27                 
    28                 doc.add(new NumericField("size",Field.Store.YES,true).setLongValue(getSize(file.length())));
    29                 doc.add(new NumericField("score2",Field.Store.YES,true).setDoubleValue(Math.random()));
    30                 doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
    31                 doc.add(new Field("content",new FileReader(file)));
    32                 
    33                 writer.addDocument(doc);
    34             }
    35         }
    36         catch(Exception ex){
    37             ex.printStackTrace();
    38         }
    39         finally{
    40             try {
    41                 writer.close();
    42             } catch (CorruptIndexException e) {
    43                 e.printStackTrace();
    44             } catch (IOException e) {
    45                 e.printStackTrace();
    46             }
    47         }
    48     }
     1 /**
     2  * 限时购商品过滤器
     3  * @author Terry
     4  *
     5  */
     6 public class LimitFilter extends Filter{
     7     //参与限时购商品的Id
     8     String[] ids = new String[]{"129"};
     9 
    10     /**
    11      * 获取文件的集合
    12      */
    13     @Override
    14     public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    15         //创建DocIdSet
    16         //reader.maxDoc()    :获取最大文件的数量
    17         //初始化集合的大小
    18         OpenBitSet obs = new OpenBitSet(reader.maxDoc());
    19         
    20         //初始化OpenbitSet对象
    21         //从指定索引开始,到指定索引结束
    22         //这里的索引不会出现下标越界的问题,所以不需要使用maxDoc()-1
    23         obs.set(0, reader.maxDoc());
    24         
    25         //遍历集合中的每一个Id并获取响应的索引位置
    26         //存储文档的Id
    27         int[] docs = new int[1];
    28         //存储关键字出现的频率
    29         int[] freqs = new int[1];
    30         
    31         //遍历预定义的Id的集合
    32         for(String id : ids){
    33             //通过id获取到TermDocs
    34             TermDocs td = reader.termDocs(new Term("id",id));
    35             //获取文档的ID(id)、freqs(频率)及count(条目数量)
    36             int count = td.read(docs, freqs);
    37             System.out.println(id + " | " + count + " | " + docs[0] + " | " + freqs[0]);
    38             
    39             if(count ==1){
    40                 //从现有集合中清除当前索引的元素
    41                 //obs.clear(docs[0]);
    42                 //obs.set(docs[0]);
    43                 obs.get(docs[0]);
    44             }
    45             
    46         }
    47     
    48         return obs;
    49     }
    50 }
     1 public class SearchUtil {
     2     private String path = "e:\lucene\index5";
     3     
     4     //获取LuceneUtil实例
     5     private LuceneUtil util = LuceneUtil.createInstance();
     6     
     7     /**
     8      * 使用自定义Filter进行过滤
     9      * @param key
    10      */
    11     public void search(String key){
    12         IndexReader reader =null;
    13         IndexSearcher searcher = null;
    14         
    15         try{
    16             //1、创建reader对象
    17             reader = util.getReader(FSDirectory.open(new File(path)));
    18             
    19             //2、创建Searcher对象
    20             searcher = util.getSeacher(reader);
    21             
    22             //3、创建Query
    23             Query query = new TermQuery(new Term("content",key));
    24             
    25             //4、获取TopDocs
    26             //TopDocs tds =searcher.search(query, 10);
    27             //使用自定义过滤器
    28             TopDocs tds = searcher.search(query, new LimitFilter(), 10);
    29             
    30             
    31             //5、获取ScoreDoc
    32             ScoreDoc[] sds = tds.scoreDocs;
    33             
    34             SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
    35             //6、处理数据
    36             for(ScoreDoc sd : sds){
    37                 Document doc =searcher.doc(sd.doc);
    38                 
    39                 System.out.println(sd.doc + "、(" + sd.score + ")" + 
    40                         " **【" +doc.get("filename") + " ** " + doc.get("size")+ " ** " + 
    41                         sdf.format(new Date(Long.parseLong(doc.get("modifydate"))))+ " ** " + doc.get("path")+"】");
    42             }
    43         }
    44         catch(Exception e){
    45             e.printStackTrace();
    46         }
    47         finally{
    48             try {
    49                 searcher.close();
    50             } catch (IOException e) {
    51                 // TODO Auto-generated catch block
    52                 e.printStackTrace();
    53             }
    54         }
    55     }
    56 }

    【优化自定义Filter】

     1 enum AccessMethod{
     2     ADD,
     3     CLEAR
     4 }
     5 
     6 public interface AccessList {
     7     //获取域名称
     8     public String getField();
     9     //获取值列表
    10     public String[] getValues();
    11     //获取方式
    12     public AccessMethod getMethod();
    13 }
     1 /**
     2  * 限时购的列表
     3  * @author Terry
     4  *
     5  */
     6 public class LimitAccessList  implements AccessList{
     7     private String field;
     8     private AccessMethod method;
     9     private String[] values;
    10     
    11     public LimitAccessList(String field, AccessMethod method, String[] values){
    12         this.field = field;
    13         this.method = method;
    14         this.values = values;
    15     }
    16 
    17     @Override
    18     public String getField() {
    19         return this.field;
    20     }
    21 
    22     @Override
    23     public String[] getValues() {
    24         // TODO Auto-generated method stub
    25         return this.values;
    26     }
    27 
    28     @Override
    29     public AccessMethod getMethod() {
    30         // TODO Auto-generated method stub
    31         return this.method;
    32     }
    33 
    34 }
     1 /**
     2  * 限时购商品过滤器
     3  * @author Terry
     4  *
     5  */
     6 public class LimitFilter extends Filter{
     7     //参与限时购商品的Id
     8     //String[] ids = new String[]{"129"};
     9     private AccessList list = null;
    10     
    11     /**
    12      * 构造函数
    13      * @param list
    14      */
    15     public LimitFilter(AccessList list) {
    16         this.list = list;
    17     }
    18 
    19 
    20 
    21     /**
    22      * 获取文件的集合
    23      */
    24     @Override
    25     public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    26         //创建DocIdSet
    27         //reader.maxDoc()    :获取最大文件的数量
    28         //初始化集合的大小
    29         OpenBitSet obs = new OpenBitSet(reader.maxDoc());
    30         
    31         //初始化OpenbitSet对象
    32         //从指定索引开始,到指定索引结束
    33         //这里的索引不会出现下标越界的问题,所以不需要使用maxDoc()-1
    34         obs.set(0, reader.maxDoc());
    35         
    36         //遍历集合中的每一个Id并获取响应的索引位置
    37         //存储文档的Id
    38         int[] docs = new int[1];
    39         //存储关键字出现的频率
    40         int[] freqs = new int[1];
    41         
    42         //遍历预定义的Id的集合
    43         for(String id : list.getValues()){
    44             //通过id获取到TermDocs
    45             TermDocs td = reader.termDocs(new Term(list.getField(),id));
    46             //获取文档的ID(id)、freqs(频率)及count(条目数量)
    47             int count = td.read(docs, freqs);
    48             System.out.println(id + " | " + count + " | " + docs[0] + " | " + freqs[0]);
    49             
    50             if(count ==1){
    51                 //从现有集合中清除当前索引的元素
    52                 if(list.getMethod() == AccessMethod.CLEAR)
    53                     obs.clear(docs[0]);
    54                 //obs.set(docs[0]);            
    55             }
    56             
    57         }
    58     
    59         return obs;
    60     }
    61 }
     1 public class SearchUtil {
     2     private String path = "e:\lucene\index5";
     3     
     4     //获取LuceneUtil实例
     5     private LuceneUtil util = LuceneUtil.createInstance();
     6     
     7     /**
     8      * 使用自定义Filter进行过滤
     9      * @param key
    10      */
    11     public void search(String key){
    12         IndexReader reader =null;
    13         IndexSearcher searcher = null;
    14         
    15         try{
    16             //1、创建reader对象
    17             reader = util.getReader(FSDirectory.open(new File(path)));
    18             
    19             //2、创建Searcher对象
    20             searcher = util.getSeacher(reader);
    21             
    22             //3、创建Query
    23             Query query = new TermQuery(new Term("content",key));
    24             
    25             //4、获取TopDocs
    26             //TopDocs tds =searcher.search(query, 10);
    27             //使用自定义过滤器
    28             //TopDocs tds = searcher.search(query, new LimitFilter(), 10);
    29             AccessList list = new LimitAccessList("id",AccessMethod.ADD, new String[]{"98","100"}); 
    30             TopDocs tds = searcher.search(query, new LimitFilter(list), 10);
    31                         
    32             //5、获取ScoreDoc
    33             ScoreDoc[] sds = tds.scoreDocs;
    34             
    35             SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
    36             //6、处理数据
    37             for(ScoreDoc sd : sds){
    38                 Document doc =searcher.doc(sd.doc);
    39                 
    40                 System.out.println(sd.doc + "、(" + sd.score + ")" + 
    41                         " **【" +doc.get("filename") + " ** " + doc.get("size")+ " ** " + 
    42                         sdf.format(new Date(Long.parseLong(doc.get("modifydate"))))+ " ** " + doc.get("path")+"】");
    43             }
    44         }
    45         catch(Exception e){
    46             e.printStackTrace();
    47         }
    48         finally{
    49             try {
    50                 searcher.close();
    51             } catch (IOException e) {
    52                 // TODO Auto-generated catch block
    53                 e.printStackTrace();
    54             }
    55         }
    56     }
    57 }
  • 相关阅读:
    idea 配置git
    mybatisgenerator自动生成Mapper.dao.entity
    Eclipse中设置作者日期等Java注释模板
    二分图匹配KM算法
    网络流最小费用最大流
    图论tarjan
    STL的一些基本操作
    图论拓扑排序
    字符串的一些基本操作
    网络流最大流
  • 原文地址:https://www.cnblogs.com/zhzcode/p/9884081.html
Copyright © 2020-2023  润新知