• lucene.net全文检索(二)lucene.net 的封装


    查询

       public class LuceneQuery : ILuceneQuery
        {
            #region Identity
            private Logger logger = new Logger(typeof(LuceneQuery));
            #endregion Identity
    
            #region QueryIndex
            /// <summary>
            /// 获取商品信息数据
            /// </summary>
            /// <param name="queryString"></param>
            /// <returns></returns>
            public List<Commodity> QueryIndex(string queryString)
            {
                IndexSearcher searcher = null;
                try
                {
                    List<Commodity> ciList = new List<Commodity>();
                    Directory dir = FSDirectory.Open(StaticConstant.IndexPath);
                    searcher = new IndexSearcher(dir);
                    Analyzer analyzer = new PanGuAnalyzer();
    
                    //--------------------------------------这里配置搜索条件
                    QueryParser parser = new QueryParser(Version.LUCENE_30, "title", analyzer);
                    Query query = parser.Parse(queryString);
                    Console.WriteLine(query.ToString()); //显示搜索表达式
                    TopDocs docs = searcher.Search(query, (Filter)null, 10000);
    
                    foreach (ScoreDoc sd in docs.ScoreDocs)
                    {
                        Document doc = searcher.Doc(sd.Doc);
                        ciList.Add(DocumentToCommodityInfo(doc));
                    }
    
                    return ciList;
                }
                finally
                {
                    if (searcher != null)
                    {
                        searcher.Dispose();
                    }
                }
            }
    
    
    
            /// <summary>
            /// 分页获取商品信息数据
            /// </summary>
            /// <param name="queryString"></param>
            /// <param name="pageIndex">第一页为1</param>
            /// <param name="pageSize"></param>
            /// <param name="totalCount"></param>
            /// <returns></returns>
            public List<Commodity> QueryIndexPage(string queryString, int pageIndex, int pageSize, out int totalCount, string priceFilter, string priceOrderBy)
            {
                totalCount = 0;
                IndexSearcher searcher = null;
                try
                {
                    List<Commodity> ciList = new List<Commodity>();
                    FSDirectory dir = FSDirectory.Open(StaticConstant.IndexPath);
                    searcher = new IndexSearcher(dir);
                    Analyzer analyzer = new PanGuAnalyzer();
    
                    //--------------------------------------这里配置搜索条件
                    QueryParser parser = new QueryParser(Version.LUCENE_30, "title", analyzer);
                    Query query = parser.Parse(queryString);
                    
                    pageIndex = Math.Max(1, pageIndex);//索引从1开始
                    int startIndex = (pageIndex - 1) * pageSize;
                    int endIndex = pageIndex * pageSize;
    
                    NumericRangeFilter<float> numPriceFilter = null;
                    if (!string.IsNullOrWhiteSpace(priceFilter))
                    {
                        bool isContainStart = priceFilter.StartsWith("[");
                        bool isContainEnd = priceFilter.EndsWith("]");
                        string[] floatArray = priceFilter.Replace("[", "").Replace("]", "").Replace("{", "").Replace("}", "").Split(',');
                        float start = 0;
                        float end = 0;
                        if (!float.TryParse(floatArray[0], out start) || !float.TryParse(floatArray[1], out end))
                        {
                            throw new Exception("Wrong priceFilter");
                        }
                        numPriceFilter = NumericRangeFilter.NewFloatRange("price", start, end, isContainStart, isContainEnd);
                    }
    
                    Sort sort = new Sort();
                    if (!string.IsNullOrWhiteSpace(priceOrderBy))
                    {
                        SortField sortField = new SortField("price", SortField.FLOAT, priceOrderBy.EndsWith("asc", StringComparison.CurrentCultureIgnoreCase));
                        sort.SetSort(sortField);
                    }
    
                    TopDocs docs = searcher.Search(query, numPriceFilter, 10000, sort);
                    //TopDocs docs = searcher.Search(query, null, 10000);
                    
                    totalCount = docs.TotalHits;
                    //PrintScores(docs, startIndex, endIndex, searcher);
                    for (int i = startIndex; i < endIndex && i < totalCount; i++)
                    {
                        Document doc = searcher.Doc(docs.ScoreDocs[i].Doc);
                        ciList.Add(DocumentToCommodityInfo(doc));
                    }
    
                    return ciList;
                }
                finally
                {
                    if (searcher != null)
                    {
                        searcher.Dispose();
                    }
                }
            }
    
            private void PrintScores(TopDocs docs, int startIndex, int endIndex, MultiSearcher searcher)
            {
                ScoreDoc[] scoreDocs = docs.ScoreDocs;
                for (int i = startIndex; i < endIndex && i < scoreDocs.Count(); i++)
                {
                    int docId = scoreDocs[i].Doc;
                    Document doc = searcher.Doc(docId);
                    logger.Info(string.Format("{0}的分值为{1}", doc.Get("productid"), scoreDocs[i].Score));
                }
            }
    
            #endregion QueryIndex
    
            #region private
            private Commodity DocumentToCommodityInfo(Document doc)
            {
                return new Commodity()
                           {
                               Id = int.Parse(doc.Get("id")),
                               Title = doc.Get("title"),
                               ProductId = long.Parse(doc.Get("productid")),
                               CategoryId = int.Parse(doc.Get("categoryid")),
                               ImageUrl = doc.Get("iamgeurl"),
                               Price = decimal.Parse(doc.Get("price")),
                               Url = doc.Get("url")
                           };
            }
    
            #endregion private
        }
    View Code

    批量/单个索引的增删改

        /// <summary>
        /// 多线程的问题 :多文件写,然后合并
        /// 延时:异步队列
        /// 
        /// </summary>
        public class LuceneBulid : ILuceneBulid
        {
            #region Identity
            private Logger logger = new Logger(typeof(LuceneBulid));
            #endregion Identity
    
            #region 批量BuildIndex 索引合并
            /// <summary>
            /// 批量创建索引(要求是统一的sourceflag,即目录是一致的)
            /// </summary>
            /// <param name="ciList">sourceflag统一的</param>
            /// <param name="pathSuffix">索引目录后缀,加在电商的路径后面,为空则为根目录.如sa1</param>
            /// <param name="isCreate">默认为false 增量索引  true的时候删除原有索引</param>
            public void BuildIndex(List<Commodity> ciList, string pathSuffix = "", bool isCreate = false)
            {
                IndexWriter writer = null;
                try
                {
                    if (ciList == null || ciList.Count == 0)
                    {
                        return;
                    }
    
                    string rootIndexPath = StaticConstant.IndexPath;
                    string indexPath = string.IsNullOrWhiteSpace(pathSuffix) ? rootIndexPath : string.Format("{0}\{1}", rootIndexPath, pathSuffix);
    
                    DirectoryInfo dirInfo = Directory.CreateDirectory(indexPath);
                    LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                    writer = new IndexWriter(directory, new PanGuAnalyzer(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
                    //writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
                    writer.SetMaxBufferedDocs(100);//控制写入一个新的segent前内存中保存的doc的数量 默认10  
                    writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
                    writer.UseCompoundFile = true;//创建复合文件 减少索引文件数量
    
                    ciList.ForEach(c => CreateCIIndex(writer, c));
                }
                finally
                {
                    if (writer != null)
                    {
                        //writer.Optimize(); 创建索引的时候不做合并  merge的时候处理
                        writer.Close();
                    }
                }
            }
    
            /// <summary>
            /// 将索引合并到上级目录
            /// </summary>
            /// <param name="sourceDir">子文件夹名</param>
            public void MergeIndex(string[] childDirs)
            {
                Console.WriteLine("MergeIndex Start");
                IndexWriter writer = null;
                try
                {
                    if (childDirs == null || childDirs.Length == 0) return;
                    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
                    string rootPath = StaticConstant.IndexPath;
                    DirectoryInfo dirInfo = Directory.CreateDirectory(rootPath);
                    LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                    writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//删除原有的
                    LuceneIO.Directory[] dirNo = childDirs.Select(dir => LuceneIO.FSDirectory.Open(Directory.CreateDirectory(string.Format("{0}\{1}", rootPath, dir)))).ToArray();
                    writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
                    writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
                    writer.AddIndexesNoOptimize(dirNo);
                }
                finally
                {
                    if (writer != null)
                    {
                        writer.Optimize();
                        writer.Close();
                    }
                    Console.WriteLine("MergeIndex End");
                }
            }
    
            //Field.Store.YES:存储字段值(未分词前的字段值)        
            //Field.Store.NO:不存储,存储与索引没有关系         
            //Field.Store.COMPRESS:压缩存储,用于长文本或二进制,但性能受损         
            //Field.Index.ANALYZED:分词建索引         
            //Field.Index.ANALYZED_NO_NORMS:分词建索引,但是Field的值不像通常那样被保存,而是只取一个byte,这样节约存储空间         
            //Field.Index.NOT_ANALYZED:不分词且索引         
            //Field.Index.NOT_ANALYZED_NO_NORMS:不分词建索引,Field的值去一个byte保存         
            //TermVector表示文档的条目(由一个Document和Field定位)和它们在当前文档中所出现的次数         
            //Field.TermVector.YES:为每个文档(Document)存储该字段的TermVector         
            //Field.TermVector.NO:不存储TermVector         
            // Field.TermVector.WITH_POSITIONS:存储位置        
            //Field.TermVector.WITH_OFFSETS:存储偏移量         
            //Field.TermVector.WITH_POSITIONS_OFFSETS:存储位置和偏移量
            #endregion 批量BuildIndex 索引合并
    
            #region 单个/批量索引增删改
            /// <summary>
            /// 新增一条数据的索引
            /// </summary>
            /// <param name="ci"></param>
            public void InsertIndex(Commodity ci)
            {
                IndexWriter writer = null;
                try
                {
                    if (ci == null) return;
                    string rootIndexPath = StaticConstant.IndexPath;
                    DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
    
                    bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引 
                    LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                    writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
                    writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
                    writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
                    CreateCIIndex(writer, ci);
                }
                catch (Exception ex)
                {
                    logger.Error("InsertIndex异常", ex);
                    throw ex;
                }
                finally
                {
                    if (writer != null)
                    {
                        //if (fileNum > 50)
                        //    writer.Optimize();
                        writer.Close();
                    }
                }
            }
    
            /// <summary>
            /// 批量新增数据的索引
            /// </summary>
            /// <param name="ciList"></param>
            public void InsertIndexMuti(List<Commodity> ciList)
            {
                BuildIndex(ciList, "", false);
            }
    
            /// <summary>
            /// 批量删除数据的索引
            /// </summary>
            /// <param name="ciList"></param>
            public void DeleteIndexMuti(List<Commodity> ciList)
            {
                IndexReader reader = null;
                try
                {
                    if (ciList == null || ciList.Count == 0) return;
                    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
                    string rootIndexPath = StaticConstant.IndexPath;
                    DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
                    LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                    reader = IndexReader.Open(directory, false);
                    foreach (Commodity ci in ciList)
                    {
                        reader.DeleteDocuments(new Term("productid", ci.ProductId.ToString()));
                    }
                }
                catch (Exception ex)
                {
                    logger.Error("DeleteIndex异常", ex);
                    throw ex;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Dispose();
                    }
                }
            }
    
            /// <summary>
            /// 删除多条数据的索引
            /// </summary>
            /// <param name="ci"></param>
            public void DeleteIndex(Commodity ci)
            {
                IndexReader reader = null;
                try
                {
                    if (ci == null) return;
                    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
                    string rootIndexPath = StaticConstant.IndexPath;
                    DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
                    LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                    reader = IndexReader.Open(directory, false);
                    reader.DeleteDocuments(new Term("productid", ci.ProductId.ToString()));
                }
                catch (Exception ex)
                {
    
                    logger.Error("DeleteIndex异常", ex);
                    throw ex;
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Dispose();
                    }
                }
            }
    
            /////// <summary>
            /////// 更新一条数据的索引
            /////// </summary>
            //public void UpdateIndex(Commodity ci)
            //{
            //    DeleteIndex(ci);
            //    InsertIndex(ci);
            //}
    
            /// <summary>
            /// 更新一条数据的索引
            /// </summary>
            /// <param name="ci"></param>
            public void UpdateIndex(Commodity ci)
            {
                IndexWriter writer = null;
                try
                {
                    if (ci == null) return;
                    string rootIndexPath = StaticConstant.IndexPath;
                    DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
    
                    bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引 
                    LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                    writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
                    writer.MergeFactor = 100;//控制多个segment合并的频率,默认10
                    writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
                    writer.UpdateDocument(new Term("productid", ci.ProductId.ToString()), ParseCItoDoc(ci));
                }
                catch (Exception ex)
                {
                    logger.Error("InsertIndex异常", ex);
                    throw ex;
                }
                finally
                {
                    if (writer != null)
                    {
                        //if (fileNum > 50)
                        //    writer.Optimize();
                        writer.Close();
                    }
                }
            }
    
            /// <summary>
            /// 批量更新数据的索引
            /// </summary>
            /// <param name="ciList">sourceflag统一的</param>
            public void UpdateIndexMuti(List<Commodity> ciList)
            {
                IndexWriter writer = null;
                try
                {
                    if (ciList == null || ciList.Count == 0) return;
                    string rootIndexPath = StaticConstant.IndexPath;
                    DirectoryInfo dirInfo = Directory.CreateDirectory(rootIndexPath);
    
                    bool isCreate = dirInfo.GetFiles().Count() == 0;//下面没有文件则为新建索引 
                    LuceneIO.Directory directory = LuceneIO.FSDirectory.Open(dirInfo);
                    writer = new IndexWriter(directory, CreateAnalyzerWrapper(), isCreate, IndexWriter.MaxFieldLength.LIMITED);
                    writer.MergeFactor = 50;//控制多个segment合并的频率,默认10
                    writer.UseCompoundFile = true;//创建符合文件 减少索引文件数量
                    foreach (Commodity ci in ciList)
                    {
                        writer.UpdateDocument(new Term("productid", ci.ProductId.ToString()), ParseCItoDoc(ci));
                    }
                }
                catch (Exception ex)
                {
                    logger.Error("InsertIndex异常", ex);
                    throw ex;
                }
                finally
                {
                    if (writer != null)
                    {
                        //if (fileNum > 50)
                        //    writer.Optimize();
                        writer.Close();
                    }
                }
            }
            #endregion 单个索引增删改
    
            #region PrivateMethod
            /// <summary>
            /// 创建分析器
            /// </summary>
            /// <returns></returns>
            private PerFieldAnalyzerWrapper CreateAnalyzerWrapper()
            {
                Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
    
                PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(analyzer);
                analyzerWrapper.AddAnalyzer("title", new PanGuAnalyzer());
                analyzerWrapper.AddAnalyzer("categoryid", new StandardAnalyzer(Version.LUCENE_30));
                return analyzerWrapper;
            }
    
            /// <summary>
            /// 创建索引
            /// </summary>
            /// <param name="analyzer"></param>
            /// <param name="title"></param>
            /// <param name="content"></param>
            private void CreateCIIndex(IndexWriter writer, Commodity ci)
            {
                try
                {
                    writer.AddDocument(ParseCItoDoc(ci));
                }
                catch (Exception ex)
                {
                    logger.Error("CreateCIIndex异常", ex);
                    throw ex;
                }
            }
    
            /// <summary>
            /// 将Commodity转换成doc
            /// </summary>
            /// <param name="ci"></param>
            /// <returns></returns>
            private Document ParseCItoDoc(Commodity ci)
            {
                Document doc = new Document();
    
                doc.Add(new Field("id", ci.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("title", ci.Title, Field.Store.YES, Field.Index.ANALYZED));//盘古分词
                doc.Add(new Field("productid", ci.ProductId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("categoryid", ci.CategoryId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("imageurl", ci.ImageUrl, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("url", ci.Url, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new NumericField("price", Field.Store.YES, true).SetFloatValue((float)ci.Price));
                return doc;
            }
    
            #endregion PrivateMethod
        }
    View Code

    分词器封装

        public class LuceneAnalyze : ILuceneAnalyze
        {
            private Logger logger = new Logger(typeof(LuceneAnalyze));
    
    
            //
            #region AnalyzerKey
            /// <summary>
            /// 将搜索的keyword分词 
            /// 通过or 链接;查询更多的数据(贪婪查询)
            /// </summary>
            /// <param name="keyword"></param>
            /// <returns></returns>
            public string[] AnalyzerKey(string keyword)
            {
                Analyzer analyzer = new PanGuAnalyzer();
                QueryParser parser = new QueryParser(Version.LUCENE_30, "title", analyzer);
                Query query = parser.Parse(this.CleanKeyword(keyword));
                if (query is TermQuery)
                {
                    Term term = ((TermQuery)query).Term;
                    return new string[] { term.Text };
                }
                else if (query is PhraseQuery)
                {
                    Term[] term = ((PhraseQuery)query).GetTerms();
                    return term.Select(t => t.Text).ToArray();
                }
                else if (query is BooleanQuery)// and  or
                {
                    BooleanClause[] clauses = ((BooleanQuery)query).GetClauses();
                    List<string> analyzerWords = new List<string>();
                    foreach (BooleanClause clause in clauses)
                    {
                        Query childQuery = clause.Query;
                        if (childQuery is TermQuery)
                        {
                            Term term = ((TermQuery)childQuery).Term;
                            analyzerWords.Add(term.Text);
                        }
                        else if (childQuery is PhraseQuery)
                        {
                            Term[] term = ((PhraseQuery)childQuery).GetTerms();
                            analyzerWords.AddRange(term.Select(t => t.Text));
                        }
                    }
                    return analyzerWords.ToArray();
                }
                else
                {
                    logger.Debug(string.Format("AnalyzerKey在解析keyword={0}的结果为new string[] { keyword } ", keyword));
                    return new string[] { keyword };
                }
            }
    
            /// <summary>
            /// 清理头尾and or 关键字
            /// </summary>
            /// <param name="keyword"></param>
            /// <returns></returns>
            private string CleanKeyword(string keyword)
            {
                if (string.IsNullOrWhiteSpace(keyword))
                { }
                else
                {
                    bool isClean = false;
                    while (!isClean)
                    {
                        keyword = keyword.Trim();
                        if (keyword.EndsWith(" AND"))
                        {
                            keyword = string.Format("{0}and", keyword.Remove(keyword.Length - 3, 3));
                        }
                        else if (keyword.EndsWith(" OR"))
                        {
                            keyword = string.Format("{0}or", keyword.Remove(keyword.Length - 2, 2));
                        }
                        else if (keyword.StartsWith("AND "))
                        {
                            keyword = string.Format("and{0}", keyword.Substring(3));
                        }
                        else if (keyword.StartsWith("OR "))
                        {
                            keyword = string.Format("or{0}", keyword.Substring(2));
                        }
                        else if (keyword.Contains(" OR "))
                        {
                            keyword = keyword.Replace(" OR ", " or ");
                        }
                        else if (keyword.Contains(" AND "))
                        {
                            keyword = keyword.Replace(" AND ", " and ");
                        }
                        else
                            isClean = true;
                    }
    
                }
                return QueryParser.Escape(keyword);
            }
            #endregion AnalyzerKey
    View Code

     

    !

    • 作       者 : 明志德道
    • 希     望: 如果内容对您有用,动动小手点个赞吧,您的支持就是我持续写作的动力!
    • 声     明1 : 如有错误,欢迎讨论,请勿谩骂^_^。
    • 声     明2 : 原创博客请在转载时保留原文链接或在文章开头加上本人博客地址,否则保留追究法律责任的权利。
  • 相关阅读:
    第一篇:spring boot 初始
    数据结构 -- 线段树
    数据结构 -- 优先队列和堆排序
    javaIO -- 流的体系设计思路、基础分类
    JavaIO -- Reader 和 Writer
    javaIO -- InputStream和OutStream
    javaIO -- File源码
    数据结构 -- 二叉树(Binary Search Tree)
    数据结构 -- 链表(LinkedList)
    数据结构 -- 栈(Stack)
  • 原文地址:https://www.cnblogs.com/for-easy-fast/p/14319028.html
Copyright © 2020-2023  润新知