• Lucene_索引(域)的查询


    package cn.tz.lucene;
    
    import java.io.File;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.BooleanQuery;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.NumericRangeQuery;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TermQuery;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.BooleanClause.Occur;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    import org.junit.Test;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    
    public class IndexSearchTest {
    
    	@Test
    	public void testIndexSearch() throws Exception{
    		//创建分词器
    		//Analyzer analyzer=new StandardAnalyzer();
    		Analyzer analyzer=new IKAnalyzer();
    		FSDirectory directory=FSDirectory.open(new File("d:\lucene"));
    		//创建索引和文档的读对象
    		IndexReader reader=IndexReader.open(directory);
    		//创建索引的搜索对象
    		IndexSearcher indexSearcher=new IndexSearcher(reader);
    		//创建查询对象 
    		//第一个参数:默认搜索域,没有指定搜索域时才使用的
    		QueryParser queryParser= new QueryParser("fileName",analyzer);
    		//格式: 域名:搜索关键词
    		//Query query = queryParser.parse("fileName:apache");
    		Query query = queryParser.parse("fileName:not exit");
    		//第一个参数:查询语句对象  第二个参数:显示的数据条数
    		TopDocs topDocs = indexSearcher.search(query,5);
    		
    		System.out.println("***** 一共有"+topDocs.totalHits+" 条记录 *****");
    		//从搜索的结果中获取结果集
    		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    		for(ScoreDoc scoreDoc:scoreDocs){
    			//获取文档id
    			int docId = scoreDoc.doc;
    			
    			//通过文档id从硬盘中读取对应得文件
    			Document doc = reader.document(docId);
    			System.out.println("fileName:"+doc.get("fileName"));
    			System.out.println("fileSize:"+doc.get("fileSize"));
    			System.out.println("==================================");
    		}
    		reader.close();
    	}
    	
    	/**
    	 * 使用TermQuery不需要分词器:它是分词后进行查询
    	 * @throws Exception
    	 */
    	@Test
    	public void testTermQuery() throws Exception{
    		Analyzer analyzer=new  IKAnalyzer();
    		FSDirectory dir=FSDirectory.open(new File("d:\lucene"));
    		//读对象
    		IndexReader reader=IndexReader.open(dir);
    		
    		//查询对象
    		Term term=new Term("fileName","apache");
    	    Query query=new TermQuery(term);
    		//搜索对象
    		IndexSearcher searcher=new IndexSearcher(reader);
    		TopDocs topDocs = searcher.search(query, 10);
    		System.out.println("总条数: "+topDocs.totalHits);
    		//从查询结果中获取结果集
    		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    		for(ScoreDoc scoreDoc:scoreDocs){
    			//获取文档ID
    			int docID = scoreDoc.doc;
    			//根据文档ID获取文档
    		    Document document = reader.document(docID);
    		    System.out.println("文件名: "+document.get("fileName"));
    		    System.out.println("文件大小  :  "+document.get("fileSize"));
    		    System.out.println("======================================");
    		}
    		reader.close();
    	}
    	/**
    	 * NumericRangeQuery:
    	 * 用于数字范围的查询
    	 * 注意:只针对数字类型的Field域才可以进行检索
    	 * 例如:LongFeild,FloatFeild...
    	 * @throws Exception
    	 */
    	@Test
    	public void testNumericRangeQuery() throws Exception{
    		  Analyzer analyzer=new IKAnalyzer();
    		  //数据源
    		  FSDirectory dir=FSDirectory.open(new File("d:\lucene"));
    	      IndexReader reader=IndexReader.open(dir);	
    	      IndexSearcher search=new IndexSearcher(reader);
    	      //创建query对象
    	      //参数:域名 最小值 最大值 是否包含最小值 是否包含最大值
    	      NumericRangeQuery query=NumericRangeQuery.newLongRange("fileSize",100L,1000L,true,true);
    	      
    	      TopDocs topDocs = search.search(query, 10);
    	      System.out.println("文件数量: "+topDocs.totalHits);
    	      //将查询结果转为结果集
    	      ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    	      for(ScoreDoc scoreDoc:scoreDocs){
    	    	  //获取文档ID
    	    	  int docID = scoreDoc.doc;
    	    	  //根据文档ID获取文档
    	    	  Document doc = reader.document(docID);
    	    	  System.out.println("文件名称: "+doc.get("fileName"));
    	    	  System.out.println("文件大小: "+doc.get("fileSize"));
    	    	  System.out.println("=========================");
    	      }
    	      reader.close();
    	 }
    	
    		/**
    		 * BooleanQuery:用于多个条件(组合)查询
    		 * 
    		 */
    		@Test
    		public void testBooleanQuery() throws Exception{
    		      FSDirectory dir=FSDirectory.open(new File("d:\lucene"));
    		      IndexReader reader=IndexReader.open(dir);
    		      IndexSearcher searcher=new IndexSearcher(reader);
    		      
    		      TermQuery termQuery=new TermQuery(new Term("fileName","apache"));
    		      NumericRangeQuery numericRangeQuery=NumericRangeQuery.newLongRange("fileSize",100L,1000L,true,true);
    		      BooleanQuery booleanQuery=new BooleanQuery();
    		      //Occur:
    		      //MUST:and
    		      //MUST_NOT:not
    		      //Should:or
    		      //查询文件名字包含有apache,文件大小在100-1000bit之内的
    		      booleanQuery.add(termQuery, Occur.MUST);
    		      booleanQuery.add(numericRangeQuery,Occur.MUST);
    		      TopDocs topDocs=searcher.search(booleanQuery, 10);
    		      
    		      System.out.println("文件数量 : "+topDocs.totalHits);
    		      ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    		      for(ScoreDoc scoreDoc:scoreDocs){
    		    	  int docId = scoreDoc.doc;
    		    	  Document document = reader.document(docId);
    		    	  System.out.println("文件名称: "+document.get("fileName"));
    		    	  System.out.println("文件大小: "+document.get("fileSize"));
    		    	  System.out.println("==============================");
    		    	  
    		      }
    		}
    		
    		/**
    		 * MultiFieldQueryParser:从多个域进行查询
    		 * 
    		 */
    		@Test
    		public void testMultiFieldQueryParser() throws Exception{
    			Analyzer analyzer=new IKAnalyzer();
    			FSDirectory directory=FSDirectory.open(new File("d:\lucene"));
    			IndexReader reader=IndexReader.open(directory);
    			IndexSearcher searcher=new IndexSearcher(reader);
    			//需求:查询文件名称和文件内容中包含有"apache"的内容
    			//从fileName、fileContent域中进行查询
    			String[] fields={"fileName","fileContent"};
    			MultiFieldQueryParser multiQueryParser=new MultiFieldQueryParser(fields, analyzer);
    			Query query = multiQueryParser.parse("apache");
    			TopDocs topDocs=searcher.search(query, 5);
    			System.out.println("总记录数: "+topDocs.totalHits);
    			//根据查询结果返回结果集,并遍历 
    			for(ScoreDoc scoreDoc:topDocs.scoreDocs){
    				int docId = scoreDoc.doc;
    				Document doc = reader.document(docId);
    				System.out.println("文档名称:"+doc.get("fileName"));
    				System.out.println("文档大小:"+doc.get("fileSize"));
    				System.out.println("===============================");
    			}
    		}
    }
    

      

  • 相关阅读:
    Git学习-创建版本库
    使用Vim编辑器,如何退出
    设置既定目录的命令提示符
    字符数组和字符串
    一波杂乱的分享
    全国软件设计大赛C/C++语言练习
    HDU 1720、1062、2104、1064、2734、1170、1197、2629
    hdu 2000-2010 ACM
    HDU——算法练习1000 1089-1096
    爬虫学习笔记之为什么要设置超时时间,怎么设置(使用selenium)
  • 原文地址:https://www.cnblogs.com/yuefeng123/p/8311174.html
Copyright © 2020-2023  润新知