• Lucene.net 全文检索文件


    using Lucene.Net.Analysis;
    using Lucene.Net.Analysis.Tokenattributes;
    using Lucene.Net.Documents;
    using Lucene.Net.Index;
    using Lucene.Net.QueryParsers;
    using Lucene.Net.Search;
    using Lucene.Net.Store;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace TestApp
    {
        class Program
        {
            static void Main()
            {
                
    
                #region 查词
                StringBuilder sb = new StringBuilder();
                //索引库目录
                Lucene.Net.Store.Directory dir_search = FSDirectory.Open(new System.IO.DirectoryInfo("IndexDir"), new NoLockFactory());
                IndexReader reader = IndexReader.Open(dir_search, true);
                IndexSearcher search = null;
                try
                {
                    search = new IndexSearcher(reader);
                    QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "body", new PanGuAnalyzer());
                    Query query = parser.Parse(LuceneHelper.GetKeyWordSplid("文章"));
                    //执行搜索,获取查询结果集对象  
                    TopDocs ts = search.Search(query, null, 1000);
                    ///获取命中的文档信息对象  
                    ScoreDoc[] docs = ts.ScoreDocs;
                    for (int i = 0; i < docs.Length; i++)
                    {
                        int docId = docs[i].Doc;
                        Document doc = search.Doc(docId);
                        var t = doc.Get("number");
                        Console.WriteLine(t);
                        var b = doc.Get("body");
                        Console.WriteLine(b);
                    }
                }
                catch (Exception ex)
                {
                    throw;
                }
                finally
                {
                    if (search != null)
                        search.Dispose();
                    if (dir_search != null)
                        dir_search.Dispose();
                }
                #endregion
            }
    
            //帮助类,对搜索的关键词进行分词
            public static class LuceneHelper
            {
                public static string GetKeyWordSplid(string keywords)
                {
                    StringBuilder sb = new StringBuilder();
                    Analyzer analyzer = new PanGuAnalyzer();
                    TokenStream stream = analyzer.TokenStream(keywords, new StringReader(keywords));
                    ITermAttribute ita = null;
                    bool hasNext = stream.IncrementToken();
                    while (hasNext)
                    {
                        ita = stream.GetAttribute<ITermAttribute>();
                        sb.Append(ita.Term + " ");
                        hasNext = stream.IncrementToken();
                    }
                    return sb.ToString();
                }
            }
    
            /// <summary>
            /// 创建索引文件
            /// </summary>
            private static void CreateIndex()
            {
                IndexWriter writer = null;
                Analyzer analyzer = new PanGuAnalyzer();
                Lucene.Net.Store.Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo("../ItemIndexDir"));
                try
                {
                    ////IndexReader:对索引进行读取的类。
                    //该语句的作用:判断索引库文件夹是否存在以及索引特征文件是否存在。
                    bool isCreate = !IndexReader.IndexExists(dir);
                    writer = new IndexWriter(dir, analyzer, isCreate, IndexWriter.MaxFieldLength.UNLIMITED);
                    //添加索引
                    for (int i = 1; i <= 5; i++)
                    {
                        Document doc = new Document();
                        string path = System.IO.Directory.GetParent(System.IO.Directory.GetCurrentDirectory()).Parent.FullName + @"DataTest" + i + ".txt";
                        string text = File.ReadAllText(path, Encoding.Default);
                        //Field.Store.YES:表示是否存储原值。只有当Field.Store.YES在后面才能用doc.Get("number")取出值来.Field.Index. NOT_ANALYZED:不进行分词保存
                        doc.Add(new Field("number", i.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                        // Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS:不仅保存分词还保存分词的距离。
                        doc.Add(new Field("body", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                        writer.AddDocument(doc);
                    }
                    writer.Optimize();
                }
                catch (Exception ex)
                {
                    throw;
                }
                finally
                {
                    if (writer != null)
                        writer.Dispose();
                    if (dir != null)
                        dir.Dispose();
                }
            }
        }
    }
  • 相关阅读:
    Java 练习(经典例题: 生产者/消费者问题)
    Java 基础(线程的通信)
    Java 练习(线程的同步)
    Java 基础( ReentrantLock )
    Java 基础(线程的死锁问题)
    Java基础(单实例设计模式懒汉式解决线程安全)
    Java 基础(同步方法解决线程安全问题)
    Java 基础(Thread类的有关方法,线程的调度)
    Java 基础(线程的生命周期, 同步代码块解决线程安全)
    Java 基础(以实现 Runnable 接口的方式创建多线程)
  • 原文地址:https://www.cnblogs.com/LTEF/p/10402249.html
Copyright © 2020-2023  润新知