• lucene:索引 -不分词


    package com.capinfotech.faq.classifier;
    import java.util.*;
    import java.io.File;
    import java.io.IOException;
    
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.Field.Index;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TermQuery;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.store.LockObtainFailedException;
    import org.apache.lucene.util.Version;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    
    import com.capinfotech.faq.data.Query;
    import com.sun.org.apache.xerces.internal.impl.xpath.regex.ParseException;
    public class luceneindex {
        
    	 private static String dest_path="E:/gongzuo/FAQdata/kb/kuaiindex";
         static protected String[] keywords={"4001"};
         static protected String[] contents={"在吗","在不在","有人吗","在线","在没","在线吗","有不有人"," 有人没"," 有人不 在?"};
    
    	public static void main(String[] args) throws IOException {
    		// TODO Auto-generated method stub
    //		File file=new File(dest_path);
    //	    FSDirectory directory=FSDirectory.open(file);
    //		Analyzer textanalyzer=new StandardAnalyzer(Version.LUCENE_36);
    //		IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_36, textanalyzer);	
    //		IndexWriter indexa=new IndexWriter(directory,cfg);
    //	for(int i=0;i<9;i++){
    //			Document doc=new Document();
    //			Field fieldnum=new Field("id",keywords[0],Field.Store.YES,Field.Index.NOT_ANALYZED);
    //			doc.add(fieldnum);
    //			Field fieldcontent=new Field("content",contents[i],Field.Store.YES,Field.Index.NOT_ANALYZED);
    //			doc.add(fieldcontent);
    //			indexa.addDocument(doc);
    //	}
    //		//indexa.optimize();
    //		indexa.close();
    	
    	
    	
    		luceneindex querytest=new luceneindex();
    		querytest.TemqueryTest();
    	}
      public  void TemqueryTest() throws IOException{
    	//    File file=new File("E:/gongzuo/FAQdata/kb/kuaiindex");
    	  File file=new File(dest_path);
    	    FSDirectory directory=FSDirectory.open(file);
    		  IndexReader reader=IndexReader.open(directory);
    		  IndexSearcher search=new IndexSearcher(reader);
    //		  Analyzer textanalyzer=new StandardAnalyzer(Version.LUCENE_36);
    //		 QueryParser parser=new QueryParser(Version.LUCENE_36,"content",textanalyzer);
    //	     Query query=parser.parse("在");
    		  Term t =new Term("content","有人");
    		  TermQuery query=new TermQuery(t);
    		//  ScoreDoc[] docs=search.search(query,10).scoreDocs;
    		  ScoreDoc[] docs=search.search(query,12).scoreDocs;
    		for(int i=0;i<docs.length;i++){   	
    	
    			  String querycontent=search.doc(docs[i].doc).get("content");
    			  String queryid=search.doc(docs[i].doc).get("id");
    			  System.out.println("查询id: "+queryid+"	查询内容: "+querycontent);
    			  System.out.println(docs[i].score);
    			  
    		}
    		  search.close();
      }
    
    }
    


    以上是建立索引以及查询索引的简单例子,当不分词时,检索关键字根据完全匹配搜索

    栋栋
  • 相关阅读:
    yum 崩溃的解决方法
    线上mysql数据库删库恢复的案例
    CVE-2018-8120 WIN7 08提权漏洞exp
    CVE-2018-1111 劫持dhcp造成Redhat、centos代码执行
    关于 Python generator(生成器)的类比
    黯淡蓝点:旅行者号64亿公里外回望地球...
    raise RuntimeError("autoconf error") RuntimeError: autoconf error
    python 调取 shell 命令的几种方法
    struct 处理二进制
    linux 下日常使用便利工具
  • 原文地址:https://www.cnblogs.com/zhangdongdong/p/3482497.html
Copyright © 2020-2023  润新知