• Lucene.net 全文检索 盘古分词


    lucene.net + 盘古分词

    引用:

    1.Lucene.Net.dll

    2.PanGu.Lucene.Analyzer.dll 

    3.PanGu.HighLight.dll

    4.PanGu.dll

     1 using Lucene.Net.Search;
     2 using Lucene.Net.Store;
     3 using Lucene.Net.QueryParsers;
     4 using Lucene.Net.Documents;
     5 using Lucene.Net.Index;
     6 using Lucene.Net.Analysis.Standard;
     7 using Lucene.Net.Analysis;
     8 using Lucene.Net.Analysis.PanGu;
     9 using PanGu.HighLight;
    10 using PanGu;

    1.建立索引:

     1 static string path = @"G:indextest";//索引文件储存位置
     2 
     3 static void CreateIndex()
     4         {
     5             //创建索引库目录
     6             var directory = FSDirectory.Open(new DirectoryInfo(path));
     7             Analyzer analyzer = null;
     8             //analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
     9 
    10             if (isPangu)
    11             {
    12                 analyzer = new PanGuAnalyzer();//盘古Analyzer
    13             }
    14             else
    15             {
    16                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
    17             }
    18 
    19             //创建一个索引,采用StandardAnalyzer对句子进行分词
    20             IndexWriter indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
    21             MySqlConnection conn = new MySqlConnection(@"server=localhost;User Id=root;password=123456;Database=ecshop");
    22             conn.Open();
    23             MySqlCommand cmd = new MySqlCommand("select goods_name,goods_brief from ecs_goods", conn);
    24             MySqlDataReader reader = cmd.ExecuteReader();
    25             while (reader.Read())
    26             {
    27                 //域的集合:文档,类似于表的行
    28                 Document doc = new Document();
    29                 //要索引的字段
    30                 doc.Add(new Field("goods_name", reader["goods_name"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
    31                 doc.Add(new Field("goods_brief", reader["goods_brief"].ToString(), Field.Store.YES, Field.Index.ANALYZED));
    32                 indexWriter.AddDocument(doc);
    33             }
    34             reader.Close();
    35             //对索引文件进行优化
    36             indexWriter.Optimize();
    37             indexWriter.Close();
    38         }

    2.搜索:

     1      protected void Page_Load(object sender, EventArgs e)
     2         {
     3             keyword = Request.Form["q"];
     4             if (keyword != null && keyword != "")
     5             {
     6                 var watch = Stopwatch.StartNew();
     7                 Analyzer analyzer = null;
     8                 analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);
     9 
    10                 //搜索
    11                 IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(new DirectoryInfo(path)), true);
    12 
    13                 //查询表达式
    14                 QueryParser queryP = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "goods_name", analyzer);
    15 
    16                 //query.parse:注入查询条件
    17                 Query query = queryP.Parse(keyword);
    18                 var hits = searcher.Search(query, 200);
    19 
    20                 //create highlighter
    21                 //IFormatter formatter = new SimpleHTMLFormatter("<span style="font-weight:bold;color: red;">", "</span>");
    22                 //SimpleFragmenter fragmenter = new SimpleFragmenter(80);
    23                 //var scorer = new QueryScorer(query);
    24                 //Highlighter highlighter = new Highlighter(formatter, scorer);
    25                 //highlighter.TextFragmenter = fragmenter;
    26 
    27                 //PanGu create highlighter
    28                 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter =
    29                    new PanGu.HighLight.SimpleHTMLFormatter("<span style="font-weight:bold;color: red;">", "</span>");
    30                 PanGu.HighLight.Highlighter highlighter =
    31                     new PanGu.HighLight.Highlighter(simpleHTMLFormatter,
    32                     new Segment());
    33                 highlighter.FragmentSize = 50;
    34 
    35                 for (int i = 0; i < hits.totalHits; i++)
    36                 {
    37                     Document doc = searcher.Doc(hits.scoreDocs[i].doc);
    38                     //TokenStream stream = analyzer.TokenStream("goods_name", new StringReader(doc.Get("goods_name")));
    39                     //String sample = highlighter.GetBestFragments(stream, doc.Get("goods_name"), 2, "...");
    40                     goods g = new goods();
    41                     g.goods_name = highlighter.GetBestFragment(keyword, doc.Get("goods_name"));
    42                     g.goods_brief = highlighter.GetBestFragment(keyword, doc.Get("goods_brief"));
    43                     gs.Add(g);
    44                 }
    45 
    46                 watch.Stop();
    47 
    48                 tasktime = "搜索耗费时间:" + watch.ElapsedMilliseconds + "毫秒";
    49             }
    50         }

     多字段搜索

    1  string[] fields = { "Title", "Content" };
    2                 MultiFieldQueryParser mq = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, fields, analyzer);
    3                 Query multiquery = mq.Parse(keyword);// MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, new string[] { keyword }, fields, analyzer);
    4                 var hits1 = searcher.Search(multiquery, 200);
  • 相关阅读:
    PAT 甲级 1132 Cut Integer (20 分)
    AcWing 7.混合背包问题
    AcWing 9. 分组背包问题
    AcWing 5. 多重背包问题 II
    AcWing 3. 完全背包问题
    AcWing 4. 多重背包问题
    AcWing 2. 01背包问题
    AcWing 875. 快速幂
    AcWing 874. 筛法求欧拉函数
    AcWing 873. 欧拉函数
  • 原文地址:https://www.cnblogs.com/mahatmasmile/p/3193911.html
Copyright © 2020-2023  润新知