• orchard之lucene.net索引生成


    orchard是微软自己团队搞的cms,园子里有很多园友已经对系统结构详细分析了,但是对里面的某些模块没有一一分析,因为需要使用lucene.net做站内搜索,所以参考学习一下,ps一下lucene.net已经是2.9.4版本了,虽然还在孵化器,但是还是更新了,不容易啊。

    点开Modules开启lucene相关应用,如下图。

    先自己在后台发几篇文章,其实在发文章的同时,orchard的消息监听机制就已经自动从消息队列中取出消息然后自动生成索引了。这里分析一下索引过程。

    点开Settings里的Search Index菜单

    现在索引里已经包含4条文档了,点击Update的时候会重新生成索引,流程如下。

    在Modules里的Orchard.Indexing里的Controllers下的AdminController

            [HttpPost]
            public ActionResult Update() {
                if (!Services.Authorizer.Authorize(StandardPermissions.SiteOwner, T("Not allowed to manage the search index.")))
                    return new HttpUnauthorizedResult();
              //更新索引,DefaultIndexName为索引文件夹名称
                _indexingService.UpdateIndex(DefaultIndexName);
    
                return RedirectToAction("Index");
            }

    Orchard.Indexing.Services.IndexingService并不少直接生成索引,而是从消息通知里获取通知后才生成索引的,如下。

            public void UpdateIndex(string indexName) {
                //获取消息通知里索引生成通知才生成索引
                foreach(var handler in _indexNotifierHandlers) {
                    handler.UpdateIndex(indexName);
                }
                //生成后将信息通过消息通知传给前台显示
                Services.Notifier.Information(T("The search index has been updated."));
            }

    将索引生成的消息通知给索引生成程序后还是不能生成索引,而是将这个消息传给生成索引的计划任务程序Orchard.Indexing.Services.UpdateIndexScheduler,在这里继续生成索引之旅。

    //将生成索引这件事添加到计划任务
    public void Schedule(string indexName) {
                var shellDescriptor = _shellDescriptorManager.GetShellDescriptor();
                _processingEngine.AddTask(
                    _shellSettings,
                    shellDescriptor,
                    "IIndexNotifierHandler.UpdateIndex",
                    new Dictionary<string, object> { { "indexName", indexName } }
                );
            }
    
            public void UpdateIndex(string indexName) {
                if(_indexingTaskExecutor.Value.UpdateIndexBatch(indexName)) {           //将生成索引的任务交给它去处理,上面的方法
                    Schedule(indexName);
                }
            }

    添加到计划任务后,他们之间的传递关系就只能通过读取消息队列来继续了。核心在这里。

    Orchard.Indexing.Services.IndexingTaskExecutor,真正处理索引任务的类,这个类会加载到内存,通过心跳方式读取消息队列,如果有新的生成索引任务就执行如下代码。

    View Code
            /// <summary>
            /// Indexes a batch of content items
            /// </summary>
            /// <returns>
            /// <c>true</c> if there are more items to process; otherwise, <c>false</c>.
            /// </returns>
            private bool BatchIndex(string indexName, string settingsFilename, IndexSettings indexSettings) {
                var addToIndex = new List<IDocumentIndex>();
                var deleteFromIndex = new List<int>();
    
                // Rebuilding the index ?
                if (indexSettings.Mode == IndexingMode.Rebuild) {
                    Logger.Information("Rebuilding index");
                    _indexingStatus = IndexingStatus.Rebuilding;
    
                    // load all content items
                    var contentItems = _contentRepository
                        .Fetch(
                            versionRecord => versionRecord.Published && versionRecord.Id > indexSettings.LastContentId,
                            order => order.Asc(versionRecord => versionRecord.Id))
                        .Take(ContentItemsPerLoop)
                        .Select(versionRecord => _contentManager.Get(versionRecord.ContentItemRecord.Id, VersionOptions.VersionRecord(versionRecord.Id)))
                        .Distinct()
                        .ToList();
    
                    // if no more elements to index, switch to update mode
                    if (contentItems.Count == 0) {
                        indexSettings.Mode = IndexingMode.Update;
                    }
    
                    foreach (var item in contentItems) {
                        try {
                            IDocumentIndex documentIndex = ExtractDocumentIndex(item);
    
                            if (documentIndex != null && documentIndex.IsDirty) {
                                addToIndex.Add(documentIndex);
                            }
    
                            indexSettings.LastContentId = item.VersionRecord.Id;
                        }
                        catch (Exception ex) {
                            Logger.Warning(ex, "Unable to index content item #{0} during rebuild", item.Id);
                        }
                    }
                }
    
                if (indexSettings.Mode == IndexingMode.Update) {
                    Logger.Information("Updating index");
                    _indexingStatus = IndexingStatus.Updating;
    
                    var indexingTasks = _taskRepository
                        .Fetch(x => x.Id > indexSettings.LastIndexedId)
                        .OrderBy(x => x.Id)
                        .Take(ContentItemsPerLoop)
                        .GroupBy(x => x.ContentItemRecord.Id)
                        .Select(group => new {TaskId = group.Max(task => task.Id), Delete = group.Last().Action == IndexingTaskRecord.Delete, Id = group.Key, ContentItem = _contentManager.Get(group.Key, VersionOptions.Published)})
                        .OrderBy(x => x.TaskId)
                        .ToArray();
    
                    foreach (var item in indexingTasks) {
                        try {
                            // item.ContentItem can be null if the content item has been deleted
                            IDocumentIndex documentIndex = ExtractDocumentIndex(item.ContentItem);
    
                            if (documentIndex == null || item.Delete) {
                                deleteFromIndex.Add(item.Id);
                            }
                            else if (documentIndex.IsDirty) {
                                addToIndex.Add(documentIndex);
                            }
    
                            indexSettings.LastIndexedId = item.TaskId;
                        }
                        catch (Exception ex) {
                            Logger.Warning(ex, "Unable to index content item #{0} during update", item.Id);
                        }
                    }
                }
    
                // save current state of the index
                indexSettings.LastIndexedUtc = _clock.UtcNow;
                _appDataFolder.CreateFile(settingsFilename, indexSettings.ToXml());
    
                if (deleteFromIndex.Count == 0 && addToIndex.Count == 0) {
                    // nothing more to do
                    _indexingStatus = IndexingStatus.Idle;
                    return false;
                }
    
                // save new and updated documents to the index
                try {
                    if (addToIndex.Count > 0) {
                        _indexProvider.Store(indexName, addToIndex);
                        Logger.Information("Added content items to index: {0}", addToIndex.Count);
                    }
                }
                catch (Exception ex) {
                    Logger.Warning(ex, "An error occured while adding a document to the index");
                }
    
                // removing documents from the index
                try {
                    if (deleteFromIndex.Count > 0) {
                        _indexProvider.Delete(indexName, deleteFromIndex);
                        Logger.Information("Added content items to index: {0}", addToIndex.Count);
                    }
                }
                catch (Exception ex) {
                    Logger.Warning(ex, "An error occured while removing a document from the index");
                }
    
                return true;
            }

    其中重要的一点是从Task中取出索引任务然后添加到lucene文档

              var indexingTasks = _taskRepository
                        .Fetch(x => x.Id > indexSettings.LastIndexedId)
                        .OrderBy(x => x.Id)
                        .Take(ContentItemsPerLoop)
                        .GroupBy(x => x.ContentItemRecord.Id)
                        .Select(group => new {TaskId = group.Max(task => task.Id), Delete = group.Last().Action == IndexingTaskRecord.Delete, Id = group.Key, ContentItem = _contentManager.Get(group.Key, VersionOptions.Published)})
                        .OrderBy(x => x.TaskId)
                        .ToArray();
    
                    foreach (var item in indexingTasks) {
                        try {
                            // item.ContentItem can be null if the content item has been deleted
                            IDocumentIndex documentIndex = ExtractDocumentIndex(item.ContentItem);
    
                            if (documentIndex == null || item.Delete) {
                                deleteFromIndex.Add(item.Id);
                            }
    else if (documentIndex.IsDirty) { addToIndex.Add(documentIndex); } indexSettings.LastIndexedId = item.TaskId; } catch (Exception ex) { Logger.Warning(ex, "Unable to index content item #{0} during update", item.Id); } }

    处理完文档过后存储文档到索引的代码如下:

                // save new and updated documents to the index
                try {
                    if (addToIndex.Count > 0) {
                     //将文档存储到索引
                        _indexProvider.Store(indexName, addToIndex);
                        Logger.Information("Added content items to index: {0}", addToIndex.Count);
                    }
                }
                catch (Exception ex) {
                    Logger.Warning(ex, "An error occured while adding a document to the index");
                }

    最终的索引存储处理在Lucene.Services.LuceneIndexProvider

            public void Store(string indexName, IEnumerable<LuceneDocumentIndex> indexDocuments) {
                if (indexDocuments.AsQueryable().Count() == 0) {
                    return;
                }
    
                // Remove any previous document for these content items
                Delete(indexName, indexDocuments.Select(i => i.ContentItemId));
    
                var writer = new IndexWriter(GetDirectory(indexName), _analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);
                LuceneDocumentIndex current = null;
    
                try {
    
                    foreach (var indexDocument in indexDocuments) {
                        current = indexDocument;
                         //将自定义的indexDocument处理成lucene的文档
                        var doc = CreateDocument(indexDocument);
    
                        writer.AddDocument(doc);
                        Logger.Debug("Document [{0}] indexed", indexDocument.ContentItemId);
                    }
                }
                catch (Exception ex) {
                    Logger.Error(ex, "An unexpected error occured while add the document [{0}] from the index [{1}].", current.ContentItemId, indexName);
                }
                finally {
                    writer.Optimize();
                    writer.Close();
                }
            }

    至此lucene的索引算是创建完毕,但是中间的一系列消息和任务之间的传递细节还需要进一步深入学习,错误之处希望园友们能够给予指正。

    独立博客:http://www.jqpress.com/ 欢迎参观

  • 相关阅读:
    001.云桌面整体解决方案实施
    Netty基础招式——ChannelHandler的最佳实践
    架构设计之数据分片
    Go是一门什么样的语言?
    Jenkins汉化配置
    Window安装构建神器Jenkins
    uni-app&H5&Android混合开发三 || uni-app调用Android原生方法的三种方式
    如何使用Hugging Face中的datasets
    关于torch.nn.LSTM()的输入和输出
    pytorch中的nn.CrossEntropyLoss()计算原理
  • 原文地址:https://www.cnblogs.com/jqbird/p/2540099.html
Copyright © 2020-2023  润新知