• UE4之RHI命令执行


    多线程架构下(GameThread --  RenderThread -- RHIThread)渲染时,不会直接调用图形API的上下文的接口

    而是创建一个个FRHICommand对象(构成一个链表),并赋值给FExecuteRHIThreadTask对象的FRHICommandListBase* RHICmdList

    接着FExecuteRHIThreadTask对象会被压入到TaskGraph[ENamedThreads::RHIThread]的Queue队列中

    RHIThread依次处理TaskGraph[ENamedThreads::RHIThread]的Queue队列中任务,执行FExecuteRHIThreadTask时

    将该对象中FRHICommandListBase* RHICmdList链表中的FRHICommand转换成图形API的上下文的接口并调用执行

    FRHICommand

    FRHICommand是RHI模块的渲染指令基类,这些指令通常由渲染线程通过命令队列Push到RHI线程,在合适的时机由RHI线程执行。

    FRHICommand继承自FRHICommandBase,它们的定义如下:

    // Engine\Source\Runtime\RHI\Public\RHICommandList.h
    
    // RHI命令基类.
    struct FRHICommandBase
    {
        // 下一个命令. (命令链表的节点)
        FRHICommandBase* Next = nullptr;
        
        // 执行命令后销毁.
        virtual void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext& DebugContext) = 0;
    };
    
    template<typename TCmd, typename NameType = FUnnamedRhiCommand>
    struct FRHICommand : public FRHICommandBase
    {
        // 执行命令后销毁.
        void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext& Context) override final
        {
            TCmd *ThisCmd = static_cast<TCmd*>(this);
            ThisCmd->Execute(CmdList);
            ThisCmd->~TCmd();
        }
    };

    注:FRHICommandBase有指向下一个节点的Next变量,意味着FRHICommandBase是命令链表的节点。

    FRHICommand拥有数量众多的子类,是通过特殊的宏来快速声明:

    // 定义RHI命令子类的宏,会从FRHICommand上派生
    #define FRHICOMMAND_MACRO(CommandName)                                \
    struct PREPROCESSOR_JOIN(CommandName##String, __LINE__)                \
    {                                                                    \
        static const TCHAR* TStr() { return TEXT(#CommandName); }        \
    };                                                                    \
    struct CommandName final : public FRHICommand<CommandName, PREPROCESSOR_JOIN(CommandName##String, __LINE__)>
    
    // PREPROCESSOR_JOIN宏定义如下 详见: UnrealEngine\Engine\Source\Runtime\Core\Public\HAL\PreprocessorHelpers.h // Concatenates two preprocessor tokens, performing macro expansion on them first #define PREPROCESSOR_JOIN(x, y) PREPROCESSOR_JOIN_INNER(x, y) #define PREPROCESSOR_JOIN_INNER(x, y) x##y

    有了以上的宏,就可以快速定义FRHICommand的子类(亦即具体的RHI命令),以FRHICommandSetStencilRef为例:

    FRHICOMMAND_MACRO(FRHICommandSetStencilRef)
    {
        uint32 StencilRef;
        FORCEINLINE_DEBUGGABLE FRHICommandSetStencilRef(uint32 InStencilRef)
            : StencilRef(InStencilRef)
        {
        }
        RHI_API void Execute(FRHICommandListBase& CmdList);
    };

    展开宏定义之后,代码如下:

    struct FRHICommandSetStencilRefString853
    {
        static const TCHAR* TStr() { return TEXT("FRHICommandSetStencilRef"); }
    };
    
    // FRHICommandSetStencilRef继承了FRHICommand.
    struct FRHICommandSetStencilRef final : public FRHICommand<FRHICommandSetStencilRef, FRHICommandSetStencilRefString853>
    {
        uint32 StencilRef;
        FRHICommandSetStencilRef(uint32 InStencilRef)
            : StencilRef(InStencilRef)
        {
        }
        RHI_API void Execute(FRHICommandListBase& CmdList);
    };

    利用FRHICOMMAND_MACRO声明的RHI命令数量众多,下面列举其中一部分:

    FRHICOMMAND_MACRO(FRHISyncFrameCommand)
    FRHICOMMAND_MACRO(FRHICommandStat)
    FRHICOMMAND_MACRO(FRHICommandRHIThreadFence)
    FRHICOMMAND_MACRO(FRHIAsyncComputeSubmitList)
    FRHICOMMAND_MACRO(FRHICommandSubmitSubList)
    
    FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitSubListParallel)
    FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitSubList)
    FRHICOMMAND_MACRO(FRHICommandWaitForAndSubmitRTSubList)
    FRHICOMMAND_MACRO(FRHICommandWaitForTemporalEffect)
    FRHICOMMAND_MACRO(FRHICommandBroadcastTemporalEffect)
        
    FRHICOMMAND_MACRO(FRHICommandBeginUpdateMultiFrameResource)
    FRHICOMMAND_MACRO(FRHICommandEndUpdateMultiFrameResource)
    FRHICOMMAND_MACRO(FRHICommandBeginUpdateMultiFrameUAV)
    FRHICOMMAND_MACRO(FRHICommandEndUpdateMultiFrameUAV)
    FRHICOMMAND_MACRO(FRHICommandSetGPUMask)
    
    FRHICOMMAND_MACRO(FRHICommandSetStencilRef)
    FRHICOMMAND_MACRO(FRHICommandSetBlendFactor)
    FRHICOMMAND_MACRO(FRHICommandSetStreamSource)
    FRHICOMMAND_MACRO(FRHICommandSetStreamSource)
    FRHICOMMAND_MACRO(FRHICommandSetViewport)
    FRHICOMMAND_MACRO(FRHICommandSetScissorRect)
        
    FRHICOMMAND_MACRO(FRHICommandBeginRenderPass)
    FRHICOMMAND_MACRO(FRHICommandEndRenderPass)
    FRHICOMMAND_MACRO(FRHICommandNextSubpass)
    FRHICOMMAND_MACRO(FRHICommandBeginParallelRenderPass)
    FRHICOMMAND_MACRO(FRHICommandEndParallelRenderPass)
    FRHICOMMAND_MACRO(FRHICommandBeginRenderSubPass)
    FRHICOMMAND_MACRO(FRHICommandEndRenderSubPass)
        
    FRHICOMMAND_MACRO(FRHICommandDrawPrimitive)
    FRHICOMMAND_MACRO(FRHICommandDrawIndexedPrimitive)
    FRHICOMMAND_MACRO(FRHICommandDrawPrimitiveIndirect)
    FRHICOMMAND_MACRO(FRHICommandDrawIndexedIndirect)
    FRHICOMMAND_MACRO(FRHICommandDrawIndexedPrimitiveIndirect)
        
    FRHICOMMAND_MACRO(FRHICommandSetGraphicsPipelineState)
    FRHICOMMAND_MACRO(FRHICommandBeginUAVOverlap)
    FRHICOMMAND_MACRO(FRHICommandEndUAVOverlap)
    
    FRHICOMMAND_MACRO(FRHICommandSetDepthBounds)
    FRHICOMMAND_MACRO(FRHICommandSetShadingRate)
    FRHICOMMAND_MACRO(FRHICommandSetShadingRateImage)
    FRHICOMMAND_MACRO(FRHICommandClearUAVFloat)
    FRHICOMMAND_MACRO(FRHICommandCopyToResolveTarget)
    FRHICOMMAND_MACRO(FRHICommandCopyTexture)
    FRHICOMMAND_MACRO(FRHICommandBeginTransitions)
    FRHICOMMAND_MACRO(FRHICommandEndTransitions)
    FRHICOMMAND_MACRO(FRHICommandResourceTransition)
    FRHICOMMAND_MACRO(FRHICommandClearColorTexture)
    FRHICOMMAND_MACRO(FRHICommandClearDepthStencilTexture)
    FRHICOMMAND_MACRO(FRHICommandClearColorTextures)
    
    FRHICOMMAND_MACRO(FRHICommandSetGlobalUniformBuffers)
    FRHICOMMAND_MACRO(FRHICommandBuildLocalUniformBuffer)
    
    FRHICOMMAND_MACRO(FRHICommandBeginRenderQuery)
    FRHICOMMAND_MACRO(FRHICommandEndRenderQuery)
    FRHICOMMAND_MACRO(FRHICommandPollOcclusionQueries)
    
    FRHICOMMAND_MACRO(FRHICommandBeginScene)
    FRHICOMMAND_MACRO(FRHICommandEndScene)
    FRHICOMMAND_MACRO(FRHICommandBeginFrame)
    FRHICOMMAND_MACRO(FRHICommandEndFrame)
    FRHICOMMAND_MACRO(FRHICommandBeginDrawingViewport)
    FRHICOMMAND_MACRO(FRHICommandEndDrawingViewport)
    
    FRHICOMMAND_MACRO(FRHICommandInvalidateCachedState)
    FRHICOMMAND_MACRO(FRHICommandDiscardRenderTargets)
    
    FRHICOMMAND_MACRO(FRHICommandUpdateTextureReference)
    FRHICOMMAND_MACRO(FRHICommandUpdateRHIResources)
    FRHICOMMAND_MACRO(FRHICommandBackBufferWaitTrackingBeginFrame)
    FRHICOMMAND_MACRO(FRHICommandFlushTextureCacheBOP)
    FRHICOMMAND_MACRO(FRHICommandCopyBufferRegion)
    FRHICOMMAND_MACRO(FRHICommandCopyBufferRegions)
    
    FRHICOMMAND_MACRO(FClearCachedRenderingDataCommand)
    FRHICOMMAND_MACRO(FClearCachedElementDataCommand)
    
    FRHICOMMAND_MACRO(FRHICommandRayTraceOcclusion)
    FRHICOMMAND_MACRO(FRHICommandRayTraceIntersection)
    FRHICOMMAND_MACRO(FRHICommandRayTraceDispatch)
    FRHICOMMAND_MACRO(FRHICommandSetRayTracingBindings)
    FRHICOMMAND_MACRO(FRHICommandClearRayTracingBindings)

    这些RHI命令的void Execute(FRHICommandListBase& CmdList)函数大多实现在UnrealEngine\Engine\Source\Runtime\RHI\Public\RHICommandListCommandExecutes.inl文件中

    void FRHICommandDrawPrimitive::Execute(FRHICommandListBase& CmdList)的实现体如下:

    void FRHICommandDrawPrimitive::Execute(FRHICommandListBase& CmdList)
    {
        RHISTAT(DrawPrimitive);  // 需开启RHI_STATS宏,才能统计
        INTERNAL_DECORATOR(RHIDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances); //宏展开后为:CmdList.GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
    }

    FRHICommand的子类除了以上用FRHICOMMAND_MACRO声明的,还拥有以下直接派生的:

    FRHICommandSetShaderParameter
    FRHICommandSetShaderUniformBuffer
    FRHICommandSetShaderTexture
    FRHICommandSetShaderResourceViewParameter
    FRHICommandSetUAVParameter
    FRHICommandSetShaderSampler
    FRHICommandSetComputeShader
    FRHICommandSetComputePipelineState
    FRHICommandDispatchComputeShader
    FRHICommandDispatchIndirectComputeShader
    FRHICommandSetAsyncComputeBudget
    FRHICommandCopyToStagingBuffer
    FRHICommandWriteGPUFence
    FRHICommandSetLocalUniformBuffer
    FRHICommandSubmitCommandsHint
    FRHICommandPushEvent
    FRHICommandPopEvent
    FRHICommandBuildAccelerationStructure
    FRHICommandBuildAccelerationStructures
    ......

    无论是直接派生还是用FRHICOMMAND_MACRO宏,都是FRHICommand的子类,都可以提供给渲染线程操作的RHI层渲染命令。只是用FRHICOMMAND_MACRO会更简便,少写一些重复的代码罢了。

    因此可知,RHI命令种类繁多,主要包含以下几大类:

    • 数据和资源的设置、更新、清理、转换、拷贝、回读。
    • 图元绘制。
    • Pass、SubPass、场景、ViewPort等的开始和结束事件。
    • 栅栏、等待、广播接口。
    • 光线追踪。
    • Slate、调试相关的命令。

    下面绘制出FRHICommand的核心继承体系:

    FRHICommandList

    FRHICommandList是RHI的指令队列,用来管理、执行一组FRHICommand的对象。它和父类的定义如下:

    // Engine\Source\Runtime\RHI\Public\RHICommandList.h
    
    // RHI命令列表基类.
    class FRHICommandListBase : public FNoncopyable  // 不允许拷贝构造
    {
    public:
        ~FRHICommandListBase();
    
        // 附带了循环利用的自定义new/delete操作.
        void* operator new(size_t Size);
        void operator delete(void *RawMemory);
    
        // 刷新命令队列.
        inline void Flush();
        // 是否立即模式.
        inline bool IsImmediate();
        // 是否立即的异步计算.
        inline bool IsImmediateAsyncCompute();
    
        // 获取已占用的内存.
        const int32 GetUsedMemory() const;
        
        // 入队异步命令队列的提交.
        void QueueAsyncCommandListSubmit(FGraphEventRef& AnyThreadCompletionEvent, class FRHICommandList* CmdList);
        // 入队并行的异步命令队列的提交.
        void QueueParallelAsyncCommandListSubmit(FGraphEventRef* AnyThreadCompletionEvents, bool bIsPrepass, class FRHICommandList** CmdLists, int32* NumDrawsIfKnown, int32 Num, int32 MinDrawsPerTranslate, bool bSpewMerge);
        // 入队渲染线程命令队列的提交.
        void QueueRenderThreadCommandListSubmit(FGraphEventRef& RenderThreadCompletionEvent, class FRHICommandList* CmdList);
        // 入队命令队列的提交.
        void QueueCommandListSubmit(class FRHICommandList* CmdList);
        // 增加派发前序任务.
        void AddDispatchPrerequisite(const FGraphEventRef& Prereq);
        
        // 等待接口.
        void WaitForTasks(bool bKnownToBeComplete = false);
        void WaitForDispatch();
        void WaitForRHIThreadTasks();
        void HandleRTThreadTaskCompletion(const FGraphEventRef& MyCompletionGraphEvent);
    
        // 分配接口.
        void* Alloc(int32 AllocSize, int32 Alignment);
        template <typename T>
        void* Alloc();
        template <typename T>
        const TArrayView<T> AllocArray(const TArrayView<T> InArray);
        TCHAR* AllocString(const TCHAR* Name);
        // 分配指令.
        void* AllocCommand(int32 AllocSize, int32 Alignment);
        template <typename TCmd>
        void* AllocCommand();
    
        bool HasCommands() const;
        bool IsExecuting() const;
        bool IsBottomOfPipe() const;
        bool IsTopOfPipe() const;
        bool IsGraphics() const;
        bool IsAsyncCompute() const;
        // RHI管线, ERHIPipeline::Graphics或ERHIPipeline::AsyncCompute.
        ERHIPipeline GetPipeline() const;
    
        // 是否忽略RHI线程而直接当同步执行.
        bool Bypass() const;
    
        // 交换命令队列.
        void ExchangeCmdList(FRHICommandListBase& Other);
        // 设置Context.
        void SetContext(IRHICommandContext* InContext);
        IRHICommandContext& GetContext();
        void SetComputeContext(IRHIComputeContext* InComputeContext);
        IRHIComputeContext& GetComputeContext();
        void CopyContext(FRHICommandListBase& ParentCommandList);
        
        void MaybeDispatchToRHIThread();
        void MaybeDispatchToRHIThreadInner();
        
        (......)
    
    private:
        // 命令链表的头.
        FRHICommandBase* Root;
        // 指向Root的指针.
        FRHICommandBase** CommandLink;
        
        bool bExecuting;
        uint32 NumCommands;
        uint32 UID;
        
        // 设备上下文.
        IRHICommandContext* Context;
        // 计算上下文.
        IRHIComputeContext* ComputeContext;
        
        FMemStackBase MemManager; 
        FGraphEventArray RTTasks;
    
        // 重置.
        void Reset();
    
    public:
        enum class ERenderThreadContext
        {
            SceneRenderTargets,
            Num
        };
        
        // 渲染线程上下文.
        void *RenderThreadContexts[(int32)ERenderThreadContext::Num];
    
    protected:
        //the values of this struct must be copied when the commandlist is split 
        struct FPSOContext
        {
            uint32 CachedNumSimultanousRenderTargets = 0;
            TStaticArray<FRHIRenderTargetView, MaxSimultaneousRenderTargets> CachedRenderTargets;
            FRHIDepthRenderTargetView CachedDepthStencilTarget;
            
            ESubpassHint SubpassHint = ESubpassHint::None;
            uint8 SubpassIndex = 0;
            uint8 MultiViewCount = 0;
            bool HasFragmentDensityAttachment = false;
        } PSOContext;
    
        // 绑定的着色器输入.
        FBoundShaderStateInput BoundShaderInput;
        // 绑定的计算着色器RHI资源.
        FRHIComputeShader* BoundComputeShaderRHI;
    
        // 使绑定的着色器生效.
        void ValidateBoundShader(FRHIVertexShader* ShaderRHI);
        void ValidateBoundShader(FRHIPixelShader* ShaderRHI);
        (......)
    
        void CacheActiveRenderTargets(...);
        void CacheActiveRenderTargets(const FRHIRenderPassInfo& Info);
        void IncrementSubpass();
        void ResetSubpass(ESubpassHint SubpassHint);
        
    public:
        void CopyRenderThreadContexts(const FRHICommandListBase& ParentCommandList);
        void SetRenderThreadContext(void* InContext, ERenderThreadContext Slot);
        void* GetRenderThreadContext(ERenderThreadContext Slot);
    
        // 通用数据.
        struct FCommonData
        {
            class FRHICommandListBase* Parent = nullptr;
    
            enum class ECmdListType
            {
                Immediate = 1,
                Regular,
            };
            ECmdListType Type = ECmdListType::Regular;
            bool bInsideRenderPass = false;
            bool bInsideComputePass = false;
        };
    
        bool DoValidation() const;
        inline bool IsOutsideRenderPass() const;
        inline bool IsInsideRenderPass() const;
        inline bool IsInsideComputePass() const;
    
        FCommonData Data;
    };
    
    // 计算命令队列.
    class FRHIComputeCommandList : public FRHICommandListBase
    {
    public:
        FRHIComputeCommandList(FRHIGPUMask GPUMask) : FRHICommandListBase(GPUMask) {}
        
        void* operator new(size_t Size);
        void operator delete(void *RawMemory);
    
        // 着色器参数设置和获取.
        inline FRHIComputeShader* GetBoundComputeShader() const;
        void SetGlobalUniformBuffers(const FUniformBufferStaticBindings& UniformBuffers);
        void SetShaderUniformBuffer(FRHIComputeShader* Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
        void SetShaderUniformBuffer(const FComputeShaderRHIRef& Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
        void SetShaderParameter(FRHIComputeShader* Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
        void SetShaderParameter(FComputeShaderRHIRef& Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
        void SetShaderTexture(FRHIComputeShader* Shader, uint32 TextureIndex, FRHITexture* Texture);
        void SetShaderResourceViewParameter(FRHIComputeShader* Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
        void SetShaderSampler(FRHIComputeShader* Shader, uint32 SamplerIndex, FRHISamplerState* State);
        void SetUAVParameter(FRHIComputeShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
        void SetUAVParameter(FRHIComputeShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV, uint32 InitialCount);
        void SetComputeShader(FRHIComputeShader* ComputeShader);
        void SetComputePipelineState(FComputePipelineState* ComputePipelineState, FRHIComputeShader* ComputeShader);
    
        void SetAsyncComputeBudget(EAsyncComputeBudget Budget);
        // 派发计算着色器.
        void DispatchComputeShader(uint32 ThreadGroupCountX, uint32 ThreadGroupCountY, uint32 ThreadGroupCountZ);
        void DispatchIndirectComputeShader(FRHIVertexBuffer* ArgumentBuffer, uint32 ArgumentOffset);
    
        // 清理.
        void ClearUAVFloat(FRHIUnorderedAccessView* UnorderedAccessViewRHI, const FVector4& Values);
        void ClearUAVUint(FRHIUnorderedAccessView* UnorderedAccessViewRHI, const FUintVector4& Values);
        
        // 资源转换.
        void BeginTransitions(TArrayView<const FRHITransition*> Transitions);
        void EndTransitions(TArrayView<const FRHITransition*> Transitions);
        inline void Transition(TArrayView<const FRHITransitionInfo> Infos);
        void BeginTransition(const FRHITransition* Transition);
        void EndTransition(const FRHITransition* Transition);
        void Transition(const FRHITransitionInfo& Info)
    
        // ---- 旧有的API ----
    
        void TransitionResource(ERHIAccess TransitionType, const FTextureRHIRef& InTexture);
        void TransitionResource(ERHIAccess TransitionType, FRHITexture* InTexture);
        inline void TransitionResources(ERHIAccess TransitionType, FRHITexture* const* InTextures, int32 NumTextures);
        void TransitionResourceArrayNoCopy(ERHIAccess TransitionType, TArray<FRHITexture*>& InTextures);
        inline void TransitionResources(ERHIAccess TransitionType, EResourceTransitionPipeline /* ignored TransitionPipeline */, FRHIUnorderedAccessView* const* InUAVs, int32 NumUAVs, FRHIComputeFence* WriteFence);
        void TransitionResource(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* InUAV, FRHIComputeFence* WriteFence);
        void TransitionResource(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* InUAV);
        void TransitionResources(ERHIAccess TransitionType, EResourceTransitionPipeline TransitionPipeline, FRHIUnorderedAccessView* const* InUAVs, int32 NumUAVs);
        void WaitComputeFence(FRHIComputeFence* WaitFence);
    
        void BeginUAVOverlap();
        void EndUAVOverlap();
        void BeginUAVOverlap(FRHIUnorderedAccessView* UAV);
        void EndUAVOverlap(FRHIUnorderedAccessView* UAV);
        void BeginUAVOverlap(TArrayView<FRHIUnorderedAccessView* const> UAVs);
        void EndUAVOverlap(TArrayView<FRHIUnorderedAccessView* const> UAVs);
    
        void PushEvent(const TCHAR* Name, FColor Color);
        void PopEvent();
        void BreakPoint();
    
        void SubmitCommandsHint();
        void CopyToStagingBuffer(FRHIVertexBuffer* SourceBuffer, FRHIStagingBuffer* DestinationStagingBuffer, uint32 Offset, uint32 NumBytes);
    
        void WriteGPUFence(FRHIGPUFence* Fence);
        void SetGPUMask(FRHIGPUMask InGPUMask);
    
        (......)
    };
    
    // RHI命令队列.
    class FRHICommandList : public FRHIComputeCommandList
    {
    public:
        FRHICommandList(FRHIGPUMask GPUMask) : FRHIComputeCommandList(GPUMask) {}
    
        bool AsyncPSOCompileAllowed() const;
    
        void* operator new(size_t Size);
        void operator delete(void *RawMemory);
        
        // 获取绑定的着色器.
        inline FRHIVertexShader* GetBoundVertexShader() const;
        inline FRHIHullShader* GetBoundHullShader() const;
        inline FRHIDomainShader* GetBoundDomainShader() const;
        inline FRHIPixelShader* GetBoundPixelShader() const;
        inline FRHIGeometryShader* GetBoundGeometryShader() const;
    
        // 更新多帧资源.
        void BeginUpdateMultiFrameResource(FRHITexture* Texture);
        void EndUpdateMultiFrameResource(FRHITexture* Texture);
        void BeginUpdateMultiFrameResource(FRHIUnorderedAccessView* UAV);
        void EndUpdateMultiFrameResource(FRHIUnorderedAccessView* UAV);
    
        // Uniform Buffer接口.
        FLocalUniformBuffer BuildLocalUniformBuffer(const void* Contents, uint32 ContentsSize, const FRHIUniformBufferLayout& Layout);
        template <typename TRHIShader>
        void SetLocalShaderUniformBuffer(TRHIShader* Shader, uint32 BaseIndex, const FLocalUniformBuffer& UniformBuffer);
        template <typename TShaderRHI>
        void SetLocalShaderUniformBuffer(const TRefCountPtr<TShaderRHI>& Shader, uint32 BaseIndex, const FLocalUniformBuffer& UniformBuffer);
        void SetShaderUniformBuffer(FRHIGraphicsShader* Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
        template <typename TShaderRHI>
        FORCEINLINE void SetShaderUniformBuffer(const TRefCountPtr<TShaderRHI>& Shader, uint32 BaseIndex, FRHIUniformBuffer* UniformBuffer);
        
        // 着色器参数.
        void SetShaderParameter(FRHIGraphicsShader* Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
        template <typename TShaderRHI>
        void SetShaderParameter(const TRefCountPtr<TShaderRHI>& Shader, uint32 BufferIndex, uint32 BaseIndex, uint32 NumBytes, const void* NewValue);
        void SetShaderTexture(FRHIGraphicsShader* Shader, uint32 TextureIndex, FRHITexture* Texture);
        template <typename TShaderRHI>
        void SetShaderTexture(const TRefCountPtr<TShaderRHI>& Shader, uint32 TextureIndex, FRHITexture* Texture);
        void SetShaderResourceViewParameter(FRHIGraphicsShader* Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
        template <typename TShaderRHI>
        void SetShaderResourceViewParameter(const TRefCountPtr<TShaderRHI>& Shader, uint32 SamplerIndex, FRHIShaderResourceView* SRV);
        void SetShaderSampler(FRHIGraphicsShader* Shader, uint32 SamplerIndex, FRHISamplerState* State);
        template <typename TShaderRHI>
        void SetShaderSampler(const TRefCountPtr<TShaderRHI>& Shader, uint32 SamplerIndex, FRHISamplerState* State);
        void SetUAVParameter(FRHIPixelShader* Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
        void SetUAVParameter(const TRefCountPtr<FRHIPixelShader>& Shader, uint32 UAVIndex, FRHIUnorderedAccessView* UAV);
        void SetBlendFactor(const FLinearColor& BlendFactor = FLinearColor::White);
        
        // 图元绘制.
        void DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances);
        void DrawIndexedPrimitive(FRHIIndexBuffer* IndexBuffer, int32 BaseVertexIndex, uint32 FirstInstance, uint32 NumVertices, uint32 StartIndex, uint32 NumPrimitives, uint32 NumInstances);
        void DrawPrimitiveIndirect(FRHIVertexBuffer* ArgumentBuffer, uint32 ArgumentOffset);
        void DrawIndexedIndirect(FRHIIndexBuffer* IndexBufferRHI, FRHIStructuredBuffer* ArgumentsBufferRHI, uint32 DrawArgumentsIndex, uint32 NumInstances);
        void DrawIndexedPrimitiveIndirect(FRHIIndexBuffer* IndexBuffer, FRHIVertexBuffer* ArgumentsBuffer, uint32 ArgumentOffset);
        
        // 设置数据.
        void SetStreamSource(uint32 StreamIndex, FRHIVertexBuffer* VertexBuffer, uint32 Offset);
        void SetStencilRef(uint32 StencilRef);
        void SetViewport(float MinX, float MinY, float MinZ, float MaxX, float MaxY, float MaxZ);
        void SetStereoViewport(float LeftMinX, float RightMinX, float LeftMinY, float RightMinY, float MinZ, float LeftMaxX, float RightMaxX, float LeftMaxY, float RightMaxY, float MaxZ);
        void SetScissorRect(bool bEnable, uint32 MinX, uint32 MinY, uint32 MaxX, uint32 MaxY);
        void ApplyCachedRenderTargets(FGraphicsPipelineStateInitializer& GraphicsPSOInit);
        void SetGraphicsPipelineState(class FGraphicsPipelineState* GraphicsPipelineState, const FBoundShaderStateInput& ShaderInput, bool bApplyAdditionalState);
        void SetDepthBounds(float MinDepth, float MaxDepth);
        void SetShadingRate(EVRSShadingRate ShadingRate, EVRSRateCombiner Combiner);
        void SetShadingRateImage(FRHITexture* RateImageTexture, EVRSRateCombiner Combiner);
        
        // 拷贝纹理.
        void CopyToResolveTarget(FRHITexture* SourceTextureRHI, FRHITexture* DestTextureRHI, const FResolveParams& ResolveParams);
        void CopyTexture(FRHITexture* SourceTextureRHI, FRHITexture* DestTextureRHI, const FRHICopyTextureInfo& CopyInfo);
        
        void ResummarizeHTile(FRHITexture2D* DepthTexture);
        
        // 渲染查询.
        void BeginRenderQuery(FRHIRenderQuery* RenderQuery)
        void EndRenderQuery(FRHIRenderQuery* RenderQuery)
        void CalibrateTimers(FRHITimestampCalibrationQuery* CalibrationQuery);
        void PollOcclusionQueries()
    
        /* LEGACY API */
        void TransitionResource(FExclusiveDepthStencil DepthStencilMode, FRHITexture* DepthTexture);
        void BeginRenderPass(const FRHIRenderPassInfo& InInfo, const TCHAR* Name);
        void EndRenderPass();
        void NextSubpass();
    
        // 下面接口需要在立即模式的命令队列调用.
        void BeginScene();
        void EndScene();
        void BeginDrawingViewport(FRHIViewport* Viewport, FRHITexture* RenderTargetRHI);
        void EndDrawingViewport(FRHIViewport* Viewport, bool bPresent, bool bLockToVsync);
        void BeginFrame();
        void EndFrame();
    
        void RHIInvalidateCachedState();
        void DiscardRenderTargets(bool Depth, bool Stencil, uint32 ColorBitMask);
        
        void CopyBufferRegion(FRHIVertexBuffer* DestBuffer, uint64 DstOffset, FRHIVertexBuffer* SourceBuffer, uint64 SrcOffset, uint64 NumBytes);
    
        (......)
    };

    FRHICommandListBase定义了命令队列所需的基本数据(命令列表、设备上下文)和接口(命令的刷新、等待、入队、派发等,内存分配)。

    FRHIComputeCommandList定义了计算着色器相关的接口、GPU资源状态转换和着色器部分参数的设置。

    FRHICommandList定义了普通渲染管线的接口,与IRHICommandContext的接口高度相似且重叠。包含VS、PS、GS的绑定,图元绘制,更多着色器参数的设置和资源状态转换,资源创建、更新和等待等等。

    FRHICommandList还有数个子类,定义如下:

    // 立即模式的命令队列.
    class FRHICommandListImmediate : public FRHICommandList
    {
        // 命令匿名函数.
        template <typename LAMBDA>
        struct TRHILambdaCommand final : public FRHICommandBase
        {
            LAMBDA Lambda;
    
            void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext&) override final;
        };
    
        FRHICommandListImmediate();
        ~FRHICommandListImmediate();
        
    public:
        // 立即刷新命令.
        void ImmediateFlush(EImmediateFlushType::Type FlushType);
        // 阻塞RHI线程.
        bool StallRHIThread();
        // 取消阻塞RHI线程.
        void UnStallRHIThread();
        // 是否阻塞中.
        static bool IsStalled();
    
        void SetCurrentStat(TStatId Stat);
    
        static FGraphEventRef RenderThreadTaskFence();
        static FGraphEventArray& GetRenderThreadTaskArray();
        static void WaitOnRenderThreadTaskFence(FGraphEventRef& Fence);
        static bool AnyRenderThreadTasksOutstanding();
        FGraphEventRef RHIThreadFence(bool bSetLockFence = false);
    
        // 将给定的异步计算命令列表按当前立即命令列表的顺序排列.
        void QueueAsyncCompute(FRHIComputeCommandList& RHIComputeCmdList);
    
        bool IsBottomOfPipe();
        bool IsTopOfPipe();
        template <typename LAMBDA>
        void EnqueueLambda(LAMBDA&& Lambda);
    
        // 资源创建.
        FSamplerStateRHIRef CreateSamplerState(const FSamplerStateInitializerRHI& Initializer)
        FRasterizerStateRHIRef CreateRasterizerState(const FRasterizerStateInitializerRHI& Initializer)
        FDepthStencilStateRHIRef CreateDepthStencilState(const FDepthStencilStateInitializerRHI& Initializer)
        FBlendStateRHIRef CreateBlendState(const FBlendStateInitializerRHI& Initializer)
        FPixelShaderRHIRef CreatePixelShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
        FVertexShaderRHIRef CreateVertexShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
        FHullShaderRHIRef CreateHullShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
        FDomainShaderRHIRef CreateDomainShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
        FGeometryShaderRHIRef CreateGeometryShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
        FComputeShaderRHIRef CreateComputeShader(TArrayView<const uint8> Code, const FSHAHash& Hash)
        FComputeFenceRHIRef CreateComputeFence(const FName& Name)
        FGPUFenceRHIRef CreateGPUFence(const FName& Name)
        FStagingBufferRHIRef CreateStagingBuffer()
        FBoundShaderStateRHIRef CreateBoundShaderState(...)
        FGraphicsPipelineStateRHIRef CreateGraphicsPipelineState(const FGraphicsPipelineStateInitializer& Initializer)
        TRefCountPtr<FRHIComputePipelineState> CreateComputePipelineState(FRHIComputeShader* ComputeShader)
        FUniformBufferRHIRef CreateUniformBuffer(...)
        FIndexBufferRHIRef CreateAndLockIndexBuffer(uint32 Stride, uint32 Size, EBufferUsageFlags InUsage, ERHIAccess InResourceState, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer)
        FIndexBufferRHIRef CreateAndLockIndexBuffer(uint32 Stride, uint32 Size, uint32 InUsage, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer)
        
        // 顶点/索引接口.
        void* LockIndexBuffer(FRHIIndexBuffer* IndexBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
        void UnlockIndexBuffer(FRHIIndexBuffer* IndexBuffer);
        void* LockStagingBuffer(FRHIStagingBuffer* StagingBuffer, FRHIGPUFence* Fence, uint32 Offset, uint32 SizeRHI);
        void UnlockStagingBuffer(FRHIStagingBuffer* StagingBuffer);
        FVertexBufferRHIRef CreateAndLockVertexBuffer(uint32 Size, EBufferUsageFlags InUsage, ...);
        FVertexBufferRHIRef CreateAndLockVertexBuffer(uint32 Size, uint32 InUsage, FRHIResourceCreateInfo& CreateInfo, void*& OutDataBuffer);
        void* LockVertexBuffer(FRHIVertexBuffer* VertexBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
        void UnlockVertexBuffer(FRHIVertexBuffer* VertexBuffer);
        void CopyVertexBuffer(FRHIVertexBuffer* SourceBuffer, FRHIVertexBuffer* DestBuffer);
        void* LockStructuredBuffer(FRHIStructuredBuffer* StructuredBuffer, uint32 Offset, uint32 SizeRHI, EResourceLockMode LockMode);
        void UnlockStructuredBuffer(FRHIStructuredBuffer* StructuredBuffer);
        
        // UAV/SRV创建.
        FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIStructuredBuffer* StructuredBuffer, bool bUseUAVCounter, bool bAppendBuffer)
        FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHITexture* Texture, uint32 MipLevel)
        FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHITexture* Texture, uint32 MipLevel, uint8 Format)
        FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIVertexBuffer* VertexBuffer, uint8 Format)
        FUnorderedAccessViewRHIRef CreateUnorderedAccessView(FRHIIndexBuffer* IndexBuffer, uint8 Format)
        FShaderResourceViewRHIRef CreateShaderResourceView(FRHIStructuredBuffer* StructuredBuffer)
        FShaderResourceViewRHIRef CreateShaderResourceView(FRHIVertexBuffer* VertexBuffer, uint32 Stride, uint8 Format)
        FShaderResourceViewRHIRef CreateShaderResourceView(const FShaderResourceViewInitializer& Initializer)
        FShaderResourceViewRHIRef CreateShaderResourceView(FRHIIndexBuffer* Buffer)
            
        uint64 CalcTexture2DPlatformSize(...);
        uint64 CalcTexture3DPlatformSize(...);
        uint64 CalcTextureCubePlatformSize(...);
        
        // 纹理操作.
        void GetTextureMemoryStats(FTextureMemoryStats& OutStats);
        bool GetTextureMemoryVisualizeData(...);
        void CopySharedMips(FRHITexture2D* DestTexture2D, FRHITexture2D* SrcTexture2D);
        void TransferTexture(FRHITexture2D* Texture, FIntRect Rect, uint32 SrcGPUIndex, uint32 DestGPUIndex, bool PullData);
        void TransferTextures(const TArrayView<const FTransferTextureParams> Params);
        void GetResourceInfo(FRHITexture* Ref, FRHIResourceInfo& OutInfo);
        FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, const FRHITextureSRVCreateInfo& CreateInfo);
        FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, uint8 MipLevel);
        FShaderResourceViewRHIRef CreateShaderResourceView(FRHITexture* Texture, uint8 MipLevel, uint8 NumMipLevels, uint8 Format);
        FShaderResourceViewRHIRef CreateShaderResourceViewWriteMask(FRHITexture2D* Texture2DRHI);
        FShaderResourceViewRHIRef CreateShaderResourceViewFMask(FRHITexture2D* Texture2DRHI);
        uint32 ComputeMemorySize(FRHITexture* TextureRHI);
        FTexture2DRHIRef AsyncReallocateTexture2D(...);
        ETextureReallocationStatus FinalizeAsyncReallocateTexture2D(FRHITexture2D* Texture2D, bool bBlockUntilCompleted);
        ETextureReallocationStatus CancelAsyncReallocateTexture2D(FRHITexture2D* Texture2D, bool bBlockUntilCompleted);
        void* LockTexture2D(...);
        void UnlockTexture2D(FRHITexture2D* Texture, uint32 MipIndex, bool bLockWithinMiptail, bool bFlushRHIThread = true);
        void* LockTexture2DArray(...);
        void UnlockTexture2DArray(FRHITexture2DArray* Texture, uint32 TextureIndex, uint32 MipIndex, bool bLockWithinMiptail);
        void UpdateTexture2D(...);
        void UpdateFromBufferTexture2D(...);
        FUpdateTexture3DData BeginUpdateTexture3D(...);
        void EndUpdateTexture3D(FUpdateTexture3DData& UpdateData);
        void EndMultiUpdateTexture3D(TArray<FUpdateTexture3DData>& UpdateDataArray);
        void UpdateTexture3D(...);
        void* LockTextureCubeFace(...);
        void UnlockTextureCubeFace(FRHITextureCube* Texture, ...);
    
        // 读取纹理表面数据.
        void ReadSurfaceData(FRHITexture* Texture, ...);
        void ReadSurfaceData(FRHITexture* Texture, ...);
        void MapStagingSurface(FRHITexture* Texture, void*& OutData, int32& OutWidth, int32& OutHeight);
        void MapStagingSurface(FRHITexture* Texture, ...);
        void UnmapStagingSurface(FRHITexture* Texture);
        void ReadSurfaceFloatData(FRHITexture* Texture, ...);
        void ReadSurfaceFloatData(FRHITexture* Texture, ...);
        void Read3DSurfaceFloatData(FRHITexture* Texture,...);
        
        // 渲染线程的资源状态转换.
        void AcquireTransientResource_RenderThread(FRHITexture* Texture);
        void DiscardTransientResource_RenderThread(FRHITexture* Texture);
        void AcquireTransientResource_RenderThread(FRHIVertexBuffer* Buffer);
        void DiscardTransientResource_RenderThread(FRHIVertexBuffer* Buffer);
        void AcquireTransientResource_RenderThread(FRHIStructuredBuffer* Buffer);
        void DiscardTransientResource_RenderThread(FRHIStructuredBuffer* Buffer);
       
        // 获取渲染查询结果.
        bool GetRenderQueryResult(FRHIRenderQuery* RenderQuery, ...);
        void PollRenderQueryResults();
        
        // 视口
        FViewportRHIRef CreateViewport(void* WindowHandle, ...);
        uint32 GetViewportNextPresentGPUIndex(FRHIViewport* Viewport);
        FTexture2DRHIRef GetViewportBackBuffer(FRHIViewport* Viewport);
        void AdvanceFrameForGetViewportBackBuffer(FRHIViewport* Viewport);
        void ResizeViewport(FRHIViewport* Viewport, ...);
        
        void AcquireThreadOwnership();
        void ReleaseThreadOwnership();
        
        // 提交命令并刷新到GPU.
        void SubmitCommandsAndFlushGPU();
        // 执行命令队列.
        void ExecuteCommandList(FRHICommandList* CmdList);
        
        // 更新资源.
        void UpdateTextureReference(FRHITextureReference* TextureRef, FRHITexture* NewTexture);
        void UpdateRHIResources(FRHIResourceUpdateInfo* UpdateInfos, int32 Num, bool bNeedReleaseRefs);
        // 刷新资源.
        void FlushResources();
        
        // 帧更新.
        void Tick(float DeltaTime);
        // 阻塞直到GPU空闲. // 强制把当前的所有rhi中指令执行完毕,并且把commandbuffer发送给gpu,并且等待gpu执行完成,相当于一个强制同步到GPU的过程.
        void BlockUntilGPUIdle();
        
        // 暂停/开启渲染.
        void SuspendRendering();
        void ResumeRendering();
        bool IsRenderingSuspended();
        
        // 压缩/解压数据.
        bool EnqueueDecompress(uint8_t* SrcBuffer, uint8_t* DestBuffer, int CompressedSize, void* ErrorCodeBuffer);
        bool EnqueueCompress(uint8_t* SrcBuffer, uint8_t* DestBuffer, int UnCompressedSize, void* ErrorCodeBuffer);
        
        // 其它接口.
        bool GetAvailableResolutions(FScreenResolutionArray& Resolutions, bool bIgnoreRefreshRate);
        void GetSupportedResolution(uint32& Width, uint32& Height);
        void VirtualTextureSetFirstMipInMemory(FRHITexture2D* Texture, uint32 FirstMip);
        void VirtualTextureSetFirstMipVisible(FRHITexture2D* Texture, uint32 FirstMip);
    
        // 获取原生的数据.
        void* GetNativeDevice();
        void* GetNativeInstance();
        // 获取立即模式的命令上下文.
        IRHICommandContext* GetDefaultContext();
        // 获取命令上下文容器.
        IRHICommandContextContainer* GetCommandContextContainer(int32 Index, int32 Num);
        
        uint32 GetGPUFrameCycles();
    };
    
    // 在RHI实现中标记命令列表的递归使用的类型定义.
    class FRHICommandList_RecursiveHazardous : public FRHICommandList
    {
    public:
        FRHICommandList_RecursiveHazardous(IRHICommandContext *Context, FRHIGPUMask InGPUMask = FRHIGPUMask::All());
    };
    
    // RHI内部使用的工具类,以更安全地使用FRHICommandList_RecursiveHazardous
    template <typename ContextType>
    class TRHICommandList_RecursiveHazardous : public FRHICommandList_RecursiveHazardous
    {
        template <typename LAMBDA>
        struct TRHILambdaCommand final : public FRHICommandBase
        {
            LAMBDA Lambda;
    
            TRHILambdaCommand(LAMBDA&& InLambda);
            void ExecuteAndDestruct(FRHICommandListBase& CmdList, FRHICommandListDebugContext&) override final;
        };
    
    public:
        TRHICommandList_RecursiveHazardous(ContextType *Context, FRHIGPUMask GPUMask = FRHIGPUMask::All());
    
        template <typename LAMBDA>
        void RunOnContext(LAMBDA&& Lambda);
    };

    FRHICommandListImmediate封装了立即模式的图形API接口,在UE渲染体系中被应用得非常广泛。它额外定义了资源的操作、创建、更新、读取和状态转换接口,也增加了线程同步和GPU同步的接口。

    下面对FRHICommandList核心继承体系来个UML图总结一下:

    RHI体系总览

    若抛开图形API的具体实现细节和众多的RHI具体子类,将RHI Context/CommandList/Command/Resource等的顶层概念汇总成UML关系图,则是如下模样:

    下图是在上面的基础上细化了子类的UML:

    RHI命令执行

    FRHICommandListExecutor

    FRHICommandListExecutor负责将Renderer层的RHI中间指令转译(或直接调用)到目标平台的图形API,它在RHI体系中起着举足轻重的作用,定义如下:

    // Engine\Source\Runtime\RHI\Public\RHICommandList.h
    
    class RHI_API FRHICommandListExecutor
    {
    public:
        enum
        {
            DefaultBypass = PLATFORM_RHITHREAD_DEFAULT_BYPASS
        };
        FRHICommandListExecutor()
            : bLatchedBypass(!!DefaultBypass)
            , bLatchedUseParallelAlgorithms(false)
        {
        }
        
        // 静态接口, 获取立即命令列表.
        static inline FRHICommandListImmediate& GetImmediateCommandList();
        // 静态接口, 获取立即异步计算命令列表.
        static inline FRHIAsyncComputeCommandListImmediate& GetImmediateAsyncComputeCommandList();
    
        // 执行命令列表.
        void ExecuteList(FRHICommandListBase& CmdList);
        void ExecuteList(FRHICommandListImmediate& CmdList);
        void LatchBypass();
    
        // 等待RHI线程栅栏.
        static void WaitOnRHIThreadFence(FGraphEventRef& Fence);
    
        // 是否绕过命令生成模式, 如果是, 则直接调用目标平台的图形API.
        FORCEINLINE_DEBUGGABLE bool Bypass()
        {
    #if CAN_TOGGLE_COMMAND_LIST_BYPASS
            return bLatchedBypass;
    #else
            return !!DefaultBypass;
    #endif
        }
        // 是否使用并行算法.
        FORCEINLINE_DEBUGGABLE bool UseParallelAlgorithms()
        {
    #if CAN_TOGGLE_COMMAND_LIST_BYPASS
            return bLatchedUseParallelAlgorithms;
    #else
            return  FApp::ShouldUseThreadingForPerformance() && !Bypass() && (GSupportsParallelRenderingTasksWithSeparateRHIThread || !IsRunningRHIInSeparateThread());
    #endif
        }
        static void CheckNoOutstandingCmdLists();
        static bool IsRHIThreadActive();
        static bool IsRHIThreadCompletelyFlushed();
    
    private:
        // 内部执行.
        void ExecuteInner(FRHICommandListBase& CmdList);
        // 内部执行, 真正执行转译.
        static void ExecuteInner_DoExecute(FRHICommandListBase& CmdList);
    
        bool bLatchedBypass;
        bool bLatchedUseParallelAlgorithms;
        
        // 同步变量.
        FThreadSafeCounter UIDCounter;
        FThreadSafeCounter OutstandingCmdListCount;
        
        // 立即模式的命令队列.
        FRHICommandListImmediate CommandListImmediate;
        // 立即模式的异步计算命令队列.
        FRHIAsyncComputeCommandListImmediate AsyncComputeCmdListImmediate;
    };

    下面是FRHICommandListExecutor部分重要接口的实现代码:

    // Engine\Source\Runtime\RHI\Private\RHICommandList.cpp
    
    // 检测RHI线程是否激活状态.
    bool FRHICommandListExecutor::IsRHIThreadActive()
    {
        // 是否异步提交.
        bool bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0;  // r.RHICmdAsyncRHIThreadDispatch
        // 1. 先检测是否存在未完成的子命令列表提交任务.
        if (bAsyncSubmit)
        {
            if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
            {
                RenderThreadSublistDispatchTask = nullptr;
            }
            if (RenderThreadSublistDispatchTask.GetReference())
            {
                return true; // it might become active at any time
            }
            // otherwise we can safely look at RHIThreadTask
        }
    
        // 2. 再检测是否存在未完成的RHI线程任务.
        if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
        {
            RHIThreadTask = nullptr;
            PrevRHIThreadTask = nullptr;
        }
        return !!RHIThreadTask.GetReference();
    }
    
    // 检测RHI线程是否完全刷新了数据.
    bool FRHICommandListExecutor::IsRHIThreadCompletelyFlushed()
    {
        if (IsRHIThreadActive() || GetImmediateCommandList().HasCommands())
        {
            return false;
        }
        if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
        {
    #if NEEDS_DEBUG_INFO_ON_PRESENT_HANG
            bRenderThreadSublistDispatchTaskClearedOnRT = IsInActualRenderingThread();
            bRenderThreadSublistDispatchTaskClearedOnGT = IsInGameThread();
    #endif
            RenderThreadSublistDispatchTask = nullptr;
        }
        return !RenderThreadSublistDispatchTask;
    }
    
    void FRHICommandListExecutor::ExecuteList(FRHICommandListImmediate& CmdList)
    {
        {
            SCOPE_CYCLE_COUNTER(STAT_ImmedCmdListExecuteTime);
            ExecuteInner(CmdList);
        }
    }
    
    void FRHICommandListExecutor::ExecuteList(FRHICommandListBase& CmdList)
    {
        // 执行命令队列转换之前先刷新已有的命令.
        if (IsInRenderingThread() && !GetImmediateCommandList().IsExecuting())
        {
            GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
        }
    
        // 内部执行.
        ExecuteInner(CmdList);
    }
    
    void FRHICommandListExecutor::ExecuteInner(FRHICommandListBase& CmdList)
    {
        // 是否在渲染线程中.
        bool bIsInRenderingThread = IsInRenderingThread();
        // 是否在游戏线程中.
        bool bIsInGameThread = IsInGameThread();
        
        // 开启了专用的RHI线程.
        if (IsRunningRHIInSeparateThread())
        {
            bool bAsyncSubmit = false;
            ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
            if (bIsInRenderingThread)
            {
                if (!bIsInGameThread && !FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    // 把所有需要传递的东西都处理掉.
                    FTaskGraphInterface::Get().ProcessThreadUntilIdle(RenderThread_Local);
                }
                // 检测子命令列表任务是否完成.
                bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0;  // r.RHICmdAsyncRHIThreadDispatch
                if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
                {
                    RenderThreadSublistDispatchTask = nullptr;
                    if (bAsyncSubmit && RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                    {
                        RHIThreadTask = nullptr;
                        PrevRHIThreadTask = nullptr;
                    }
                }
                // 检测RHI线程任务是否完成.
                if (!bAsyncSubmit && RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                {
                    RHIThreadTask = nullptr;
                    PrevRHIThreadTask = nullptr;
                }
            }
            
            if (CVarRHICmdUseThread.GetValueOnRenderThread() > 0 && bIsInRenderingThread && !bIsInGameThread) // r.RHICmdUseThread
            {
                 // 交换前序和RT线程任务的列表.
                FRHICommandList* SwapCmdList;
                FGraphEventArray Prereq;
                Exchange(Prereq, CmdList.RTTasks); 
                {
                    QUICK_SCOPE_CYCLE_COUNTER(STAT_FRHICommandListExecutor_SwapCmdLists);
                    SwapCmdList = new FRHICommandList(CmdList.GetGPUMask());
    
                    static_assert(sizeof(FRHICommandList) == sizeof(FRHICommandListImmediate), "We are memswapping FRHICommandList and FRHICommandListImmediate; they need to be swappable.");
                    SwapCmdList->ExchangeCmdList(CmdList);
                    CmdList.CopyContext(*SwapCmdList);
                    CmdList.GPUMask = SwapCmdList->GPUMask;
                    CmdList.InitialGPUMask = SwapCmdList->GPUMask;
                    CmdList.PSOContext = SwapCmdList->PSOContext;
                    CmdList.Data.bInsideRenderPass = SwapCmdList->Data.bInsideRenderPass;
                    CmdList.Data.bInsideComputePass = SwapCmdList->Data.bInsideComputePass;
                }
                
                // 提交任务.
                QUICK_SCOPE_CYCLE_COUNTER(STAT_FRHICommandListExecutor_SubmitTasks);
    
                // 创建FDispatchRHIThreadTask, 并将AllOutstandingTasks和RenderThreadSublistDispatchTask作为它的前序任务.
                if (AllOutstandingTasks.Num() || RenderThreadSublistDispatchTask.GetReference())
                {
                    Prereq.Append(AllOutstandingTasks);
                    AllOutstandingTasks.Reset();
                    if (RenderThreadSublistDispatchTask.GetReference())
                    {
                        Prereq.Add(RenderThreadSublistDispatchTask);
                    }
                    RenderThreadSublistDispatchTask = TGraphTask<FDispatchRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList, bAsyncSubmit);
                }
                // 创建FExecuteRHIThreadTask, 并将RHIThreadTask作为它的前序任务.
                else
                {
                    if (RHIThreadTask.GetReference())
                    {
                        Prereq.Add(RHIThreadTask);
                    }
                    PrevRHIThreadTask = RHIThreadTask;
                    RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList);
                }
                
                if (CVarRHICmdForceRHIFlush.GetValueOnRenderThread() > 0 ) // r.RHICmdForceRHIFlush
                {
                    // 检测渲染线程是否死锁.
                    if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                    {
                        // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                        UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner 2."));
                    }
                    
                    // 检测RenderThreadSublistDispatchTask是否完成.
                    if (RenderThreadSublistDispatchTask.GetReference())
                    {
                        FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
                        RenderThreadSublistDispatchTask = nullptr;
                    }
                    
                    // 等待RHIThreadTask完成.
                    while (RHIThreadTask.GetReference())
                    {
                        FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
                        if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                        {
                            RHIThreadTask = nullptr;
                            PrevRHIThreadTask = nullptr;
                        }
                    }
                }
                
                return;
            }
            
            // 执行RTTasks/RenderThreadSublistDispatchTask/RHIThreadTask等任务.
            if (bIsInRenderingThread)
            {
                if (CmdList.RTTasks.Num())
                {
                    if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                    {
                        UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RTTasks)."));
                    }
                    FTaskGraphInterface::Get().WaitUntilTasksComplete(CmdList.RTTasks, RenderThread_Local);
                    CmdList.RTTasks.Reset();
    
                }
                if (RenderThreadSublistDispatchTask.GetReference())
                {
                    if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                    {
                        // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                        UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RenderThreadSublistDispatchTask)."));
                    }
                    FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
    #if NEEDS_DEBUG_INFO_ON_PRESENT_HANG
                    bRenderThreadSublistDispatchTaskClearedOnRT = IsInActualRenderingThread();
                    bRenderThreadSublistDispatchTaskClearedOnGT = bIsInGameThread;
    #endif
                    RenderThreadSublistDispatchTask = nullptr;
                }
                while (RHIThreadTask.GetReference())
                {
                    if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                    {
                        // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                        UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RHIThreadTask)."));
                    }
                    FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
                    if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
                    {
                        RHIThreadTask = nullptr;
                        PrevRHIThreadTask = nullptr;
                    }
                }
            }
        }
        // 非RHI专用线程.
        else
        {
            if (bIsInRenderingThread && CmdList.RTTasks.Num())
            {
                ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    // this is a deadlock. RT tasks must be done by now or they won't be done. We could add a third queue...
                    UE_LOG(LogRHI, Fatal, TEXT("Deadlock in FRHICommandListExecutor::ExecuteInner (RTTasks)."));
                }
                FTaskGraphInterface::Get().WaitUntilTasksComplete(CmdList.RTTasks, RenderThread_Local);
                CmdList.RTTasks.Reset();
            }
        }
    
        // 内部执行命令.
        ExecuteInner_DoExecute(CmdList);
    }
    
    void FRHICommandListExecutor::ExecuteInner_DoExecute(FRHICommandListBase& CmdList)
    {
        FScopeCycleCounter ScopeOuter(CmdList.ExecuteStat);
    
        CmdList.bExecuting = true;
        check(CmdList.Context || CmdList.ComputeContext);
    
        FMemMark Mark(FMemStack::Get());
    
        // 设置多GPU的Mask.
    #if WITH_MGPU
        if (CmdList.Context != nullptr)
        {
            CmdList.Context->RHISetGPUMask(CmdList.InitialGPUMask);
        }
        if (CmdList.ComputeContext != nullptr && CmdList.ComputeContext != CmdList.Context)
        {
            CmdList.ComputeContext->RHISetGPUMask(CmdList.InitialGPUMask);
        }
    #endif
    
        FRHICommandListDebugContext DebugContext;
        FRHICommandListIterator Iter(CmdList);
        // 统计执行信息.
    #if STATS
        bool bDoStats =  CVarRHICmdCollectRHIThreadStatsFromHighLevel.GetValueOnRenderThread() > 0 && FThreadStats::IsCollectingData() && (IsInRenderingThread() || IsInRHIThread()); //r.RHICmdCollectRHIThreadStatsFromHighLevel
        if (bDoStats)
        {
            while (Iter.HasCommandsLeft())
            {
                TStatIdData const* Stat = GCurrentExecuteStat.GetRawPointer();
                FScopeCycleCounter Scope(GCurrentExecuteStat);
                while (Iter.HasCommandsLeft() && Stat == GCurrentExecuteStat.GetRawPointer())
                {
                    FRHICommandBase* Cmd = Iter.NextCommand();
                    Cmd->ExecuteAndDestruct(CmdList, DebugContext);
                }
            }
        }
        else
        // 统计指定事件.
    #elif ENABLE_STATNAMEDEVENTS
        bool bDoStats = CVarRHICmdCollectRHIThreadStatsFromHighLevel.GetValueOnRenderThread() > 0 && GCycleStatsShouldEmitNamedEvents && (IsInRenderingThread() || IsInRHIThread()); //r.RHICmdCollectRHIThreadStatsFromHighLevel
        if (bDoStats)
        {
            while (Iter.HasCommandsLeft())
            {
                PROFILER_CHAR const* Stat = GCurrentExecuteStat.StatString;
                FScopeCycleCounter Scope(GCurrentExecuteStat);
                while (Iter.HasCommandsLeft() && Stat == GCurrentExecuteStat.StatString)
                {
                    FRHICommandBase* Cmd = Iter.NextCommand();
                    Cmd->ExecuteAndDestruct(CmdList, DebugContext);
                }
            }
        }
        else
    #endif
        // 不调试或不统计信息的版本.
        {
            // 循环所有命令, 执行并销毁之.
            while (Iter.HasCommandsLeft())
            {
                FRHICommandBase* Cmd = Iter.NextCommand();
                GCurrentCommand = Cmd;
                Cmd->ExecuteAndDestruct(CmdList, DebugContext);
            }
        }
        // 充值命令列表.
        CmdList.Reset();
    }

    由此可知,FRHICommandListExecutor处理了复杂的各类任务,并且要判定任务的前序、等待、依赖关系,还有各个线程之间的依赖和等待关系。上述代码中涉及到了两个重要的任务类型:

    // 派发RHI线程任务.
    class FDispatchRHIThreadTask
    {
        FRHICommandListBase* RHICmdList; // 待派发的命令列表.
        bool bRHIThread; // 是否在RHI线程中派发.
    
    public:
        FDispatchRHIThreadTask(FRHICommandListBase* InRHICmdList, bool bInRHIThread)
            : RHICmdList(InRHICmdList)
            , bRHIThread(bInRHIThread)
        {        
        }
        FORCEINLINE TStatId GetStatId() const;
        static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }
    
        // 预期的线程由是否在RHI线程/是否在独立的RHI线程等变量决定.
        ENamedThreads::Type GetDesiredThread()
        {
            return bRHIThread ? (IsRunningRHIInDedicatedThread() ? ENamedThreads::RHIThread : CPrio_RHIThreadOnTaskThreads.Get()) : ENamedThreads::GetRenderThread_Local();
        }
        
        void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
        {
            // 前序任务是RHIThreadTask.
            FGraphEventArray Prereq;
            if (RHIThreadTask.GetReference())
            {
                Prereq.Add(RHIThreadTask);
            }
            // 将当前任务放到PrevRHIThreadTask中.
            PrevRHIThreadTask = RHIThreadTask;
            // 创建FExecuteRHIThreadTask任务并赋值到RHIThreadTask.
            RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, CurrentThread).ConstructAndDispatchWhenReady(RHICmdList);
        }
    };
    
    // 执行RHI线程任务.
    class FExecuteRHIThreadTask
    {
        FRHICommandListBase* RHICmdList;
    
    public:
        FExecuteRHIThreadTask(FRHICommandListBase* InRHICmdList)
            : RHICmdList(InRHICmdList)
        {
        }
    
        FORCEINLINE TStatId GetStatId() const;
        static ESubsequentsMode::Type GetSubsequentsMode() { return ESubsequentsMode::TrackSubsequents; }
    
        // 根据是否在专用的RHI线程而选择RHI或渲染线程.
        ENamedThreads::Type GetDesiredThread()
        {
            return IsRunningRHIInDedicatedThread() ? ENamedThreads::RHIThread : CPrio_RHIThreadOnTaskThreads.Get();
        }
        
        void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
        {
            // 设置全局变量GRHIThreadId
            if (IsRunningRHIInTaskThread())
            {
                GRHIThreadId = FPlatformTLS::GetCurrentThreadId();
            }
            
            // 执行RHI命令队列.
            {
                // 临界区, 保证线程访问安全.
                FScopeLock Lock(&GRHIThreadOnTasksCritical);
                
                FRHICommandListExecutor::ExecuteInner_DoExecute(*RHICmdList);
                delete RHICmdList;
            }
            
            // 清空全局变量GRHIThreadId
            if (IsRunningRHIInTaskThread())
            {
                GRHIThreadId = 0;
            }
        }
    };

    由上可知,在派发和转译命令队列时,可能在专用的RHI线程执行,也可能在渲染线程或工作线程执行。

    GRHICommandList

    GRHICommandList乍一看以为是FRHICommandListBase的实例,但实际类型是FRHICommandListExecutor。它的声明和实现如下:

    // Engine\Source\Runtime\RHI\Public\RHICommandList.h
    extern RHI_API FRHICommandListExecutor GRHICommandList;
    
    // Engine\Source\Runtime\RHI\Private\RHICommandList.cpp
    RHI_API FRHICommandListExecutor GRHICommandList;

    有关GRHICommandList的全局或静态接口如下:

    FRHICommandListImmediate& FRHICommandListExecutor::GetImmediateCommandList()
    {
        return GRHICommandList.CommandListImmediate;
    }
    
    FRHIAsyncComputeCommandListImmediate& FRHICommandListExecutor::GetImmediateAsyncComputeCommandList()
    {
        return GRHICommandList.AsyncComputeCmdListImmediate;
    }

    在UE的渲染模块和RHI模块中拥有大量的GRHICommandList使用案例,取其中之一:

    // Engine\Source\Runtime\Renderer\Private\DeferredShadingRenderer.cpp
    
    void ServiceLocalQueue()
    {
        FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GetRenderThread_Local());
    
        if (IsRunningRHIInSeparateThread())
        {
            FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
        }
    }

    在RHI命令队列模块,除了涉及GRHICommandList,还涉及诸多全局的任务变量:

    // Engine\Source\Runtime\RHI\Private\RHICommandList.cpp
    
    static FGraphEventArray AllOutstandingTasks;
    static FGraphEventArray WaitOutstandingTasks;
    static FGraphEventRef RHIThreadTask;
    static FGraphEventRef PrevRHIThreadTask;
    static FGraphEventRef RenderThreadSublistDispatchTask;

    它们的创建或添加任务的代码如下:

    void FRHICommandListBase::QueueParallelAsyncCommandListSubmit(FGraphEventRef* AnyThreadCompletionEvents, ...)
    {
        (......)
        
        if (Num && IsRunningRHIInSeparateThread())
        {
            (......)
                
            // 创建FParallelTranslateSetupCommandList任务.
            FGraphEventRef TranslateSetupCompletionEvent = TGraphTask<FParallelTranslateSetupCommandList>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(CmdList, &RHICmdLists[0], Num, bIsPrepass);
            QueueCommandListSubmit(CmdList);
            // 添加到AllOutstandingTasks.
            AllOutstandingTasks.Add(TranslateSetupCompletionEvent);
            
            (......)
            
            FGraphEventArray Prereq;
            FRHICommandListBase** RHICmdLists = (FRHICommandListBase**)Alloc(sizeof(FRHICommandListBase*) * (1 + Last - Start), alignof(FRHICommandListBase*));
            // 将所有外部任务AnyThreadCompletionEvents加入到对应的列表中.
            for (int32 Index = Start; Index <= Last; Index++)
            {
                FGraphEventRef& AnyThreadCompletionEvent = AnyThreadCompletionEvents[Index];
                FRHICommandList* CmdList = CmdLists[Index];
                RHICmdLists[Index - Start] = CmdList;
                if (AnyThreadCompletionEvent.GetReference())
                {
                    Prereq.Add(AnyThreadCompletionEvent);
                    AllOutstandingTasks.Add(AnyThreadCompletionEvent);
                    WaitOutstandingTasks.Add(AnyThreadCompletionEvent);
                }
            }
            
            (......)
            
            // 并行转译任务FParallelTranslateCommandList.
            FGraphEventRef TranslateCompletionEvent = TGraphTask<FParallelTranslateCommandList>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(&RHICmdLists[0], 1 + Last - Start, ContextContainer, bIsPrepass);
            AllOutstandingTasks.Add(TranslateCompletionEvent);
            
            (......)
    }
        
    void FRHICommandListBase::QueueAsyncCommandListSubmit(FGraphEventRef& AnyThreadCompletionEvent, class FRHICommandList* CmdList)
    {
        (......)
        
        // 处理外部任务AnyThreadCompletionEvent
        if (AnyThreadCompletionEvent.GetReference())
        {
            if (IsRunningRHIInSeparateThread())
            {
                AllOutstandingTasks.Add(AnyThreadCompletionEvent);
            }
            WaitOutstandingTasks.Add(AnyThreadCompletionEvent);
        }
        
        (......)
    }
        
    class FDispatchRHIThreadTask
    {
        void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
        {
            (......)
            
            // 创建RHI线程任务FExecuteRHIThreadTask.
            PrevRHIThreadTask = RHIThreadTask;
            RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, CurrentThread).ConstructAndDispatchWhenReady(RHICmdList);
        }
    };
        
    class FParallelTranslateSetupCommandList
    {
        void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
        {
            (......)
    
            // 创建并行转译任务FParallelTranslateCommandList.
            FGraphEventRef TranslateCompletionEvent = TGraphTask<FParallelTranslateCommandList>::CreateTask(nullptr, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(&RHICmdLists[Start], 1 + Last - Start, ContextContainer, bIsPrepass);
            MyCompletionGraphEvent->DontCompleteUntil(TranslateCompletionEvent);
            // 利用RHICmdList的接口FRHICommandWaitForAndSubmitSubListParallel提交任务, 最终会进入AllOutstandingTasks和WaitOutstandingTasks.
            ALLOC_COMMAND_CL(*RHICmdList, FRHICommandWaitForAndSubmitSubListParallel)(TranslateCompletionEvent, ContextContainer, EffectiveThreads, ThreadIndex++);
        
    };
        
    void FRHICommandListExecutor::ExecuteInner(FRHICommandListBase& CmdList)
    {
        (......)
        
        if (IsRunningRHIInSeparateThread())
        {
            (......)
            
            if (AllOutstandingTasks.Num() || RenderThreadSublistDispatchTask.GetReference())
            {
                (......)
                // 创建渲染线程子命令派发(提交)任务FDispatchRHIThreadTask.
                RenderThreadSublistDispatchTask = TGraphTask<FDispatchRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList, bAsyncSubmit);
            }
            else
            {
                (......)
                PrevRHIThreadTask = RHIThreadTask;
                // 创建渲染线程子命令转译任务FExecuteRHIThreadTask.
                RHIThreadTask = TGraphTask<FExecuteRHIThreadTask>::CreateTask(&Prereq, ENamedThreads::GetRenderThread()).ConstructAndDispatchWhenReady(SwapCmdList);
            }
            
            (......)
    }

    总结一下这些任务变量的作用:

    任务变量执行线程描述
    AllOutstandingTasks 渲染、RHI、工作 所有在处理或待处理的任务列表。类型是FParallelTranslateSetupCommandList、FParallelTranslateCommandList。
    WaitOutstandingTasks 渲染、RHI、工作 待处理的任务列表。类型是FParallelTranslateSetupCommandList、FParallelTranslateCommandList。
    RHIThreadTask RHI、工作 正在处理的RHI线程任务。类型是FExecuteRHIThreadTask。
    PrevRHIThreadTask RHI、工作 上一次处理的RHIThreadTask。类型是FExecuteRHIThreadTask。
    RenderThreadSublistDispatchTask 渲染、RHI、工作 正在派发(提交)的任务。类型是FDispatchRHIThreadTask。注:与并行渲染AllOutstandingTasks、WaitOutstandingTasks有关

    D3D11命令执行

    本节将研究UE4.26在PC平台的通用RHI及D3D11命令运行过程和机制。由于UE4.26在PC平台默认的RHI是D3D11,并且关键的几个控制台变量的默认值如下:

    也就是说开启了命令跳过模式,并且禁用了RHI线程。在此情况下,FRHICommandList的某个接口被调用时,不会生成单独的FRHICommand,而是直接调用Context的方法。以FRHICommandList::DrawPrimitive为例:

    class RHI_API FRHICommandList : public FRHIComputeCommandList
    {
        void DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances)
        {
            // 默认情况下Bypass为1, 进入此分支.
            if (Bypass())
            {
                // 直接调用图形API的上下文的对应方法.
                GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
                return;
            }
            
            // 分配单独的FRHICommandDrawPrimitive命令.
            ALLOC_COMMAND(FRHICommandDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances);

    // 展开后变为:

              // new(AllocCommand(sizeof(FRHICommandDrawPrimitive), alignof(FRHICommandDrawPrimitive))) FRHICommandDrawPrimitive(
              //        BaseVertexIndex, NumPrimitives, NumInstances);

        }
    }

    因此,在PC的默认图形API(D3D11)下,r.RHICmdBypass 1且r.RHIThread.Enable 0,FRHICommandList将直接调用图形API的上下文的接口,相当于同步调用图形API,此时的图形API运行于渲染线程(如果开启)。

    接着将r.RHICmdBypass设为0,但保持r.RHIThread.Enable为0,此时不再直接调用Context的方法,而是通过生成一条条单独的FRHICommand,然后由FRHICommandList相关的对象执行。还是以FRHICommandList::DrawPrimitive为例,调用堆栈如下所示:

    class RHI_API FRHICommandList : public FRHIComputeCommandList
    {
        void FRHICommandList::DrawPrimitive(uint32 BaseVertexIndex, uint32 NumPrimitives, uint32 NumInstances)
        {
            // 默认情况下Bypass为1, 进入此分支.
            if (Bypass())
            {
                // 直接调用图形API的上下文的对应方法.
                GetContext().RHIDrawPrimitive(BaseVertexIndex, NumPrimitives, NumInstances);
                return;
            }
            
            // 分配单独的FRHICommandDrawPrimitive命令.
            // ALLOC_COMMAND宏会调用AllocCommand接口.
            ALLOC_COMMAND(FRHICommandDrawPrimitive)(BaseVertexIndex, NumPrimitives, NumInstances);
        }
        
        template <typename TCmd>
        void* AllocCommand()
        {
            return AllocCommand(sizeof(TCmd), alignof(TCmd));
        }
        
        void* AllocCommand(int32 AllocSize, int32 Alignment)
        {
            FRHICommandBase* Result = (FRHICommandBase*) MemManager.Alloc(AllocSize, Alignment);
            ++NumCommands;
            // CommandLink指向了上一个命令节点的Next.
            *CommandLink = Result;
            // 将CommandLink赋值为当前节点的Next.
            CommandLink = &Result->Next;
            return Result;
        }
    }

    利用ALLOC_COMMAND宏分配的命令实例会进入FRHICommandListBase的命令链表,但此时并未执行,而是等待其它合适的时机执行,例如在FRHICommandListImmediate::ImmediateFlush。下面是执行FRHICommandList的调用堆栈:

    由调用堆栈可以得知,在此情况下,命令执行的过程变得复杂起来,多了很多中间执行步骤。还是以FRHICommandList::DrawPrimitive为例,调用流程示意图如下:

    上图的使用了宏INTERNAL_DECORATOR,其和相关宏的定义如下:

    // Engine\Source\Runtime\RHI\Public\RHICommandListCommandExecutes.inl
    
    #define INTERNAL_DECORATOR(Method) CmdList.GetContext().Method
    #define INTERNAL_DECORATOR_COMPUTE(Method) CmdList.GetComputeContext().Method

    相当于通过宏来调用CommandList的Context接口。

    在RHI禁用(r.RHIThread.Enable 0)情况下,以上的调用在渲染线程执行:

    接下来将r.RHIThread.Enable设为1,以开启RHI线程。此时运行命令的线程变成了RHI:

    并且调用堆栈是从TaskGraph的RHI线程发起任务:

    此时,命令执行的流程图如下:

    上面流程图中,方角表示在渲染线程执行,而圆角在RHI线程执行(绿框中)。

    开启RHI线程后,将出现它的统计数据:

    左:未开启RHI线程的统计数据;右:开启RHI线程后的统计数据。

    下面绘制出开启或关闭Bypass和RHI线程的流程图(以调用D3D11的DrawPrimitive为例):

    上面流程图中,方角表示在渲染线程中执行,圆角表示在RHI线程中执行(绿框中)。

    ImmediateFlush

    在FDynamicRHI中,提及了刷新类型(FlushType),是指EImmediateFlushType定义的类型:

    // Engine\Source\Runtime\RHI\Public\RHICommandList.h
    
    namespace EImmediateFlushType
    {
        enum Type
        { 
            WaitForOutstandingTasksOnly = 0, // 等待仅正在处理的任务完成.
            DispatchToRHIThread,             // 派发到RHI线程. 注:创建一个FExecuteRHIThreadTask任务并投递到RHIThread的TaskGraph任务队列中
            WaitForDispatchToRHIThread,      // 等待派发到RHI线程.
            FlushRHIThread,                  // 刷新RHI线程.
            FlushRHIThreadFlushResources,    // 刷新RHI线程和资源
            FlushRHIThreadFlushResourcesFlushDeferredDeletes // 刷新RHI线程/资源和延迟删除.
        };
    };

    EImmediateFlushType中各个值的区别在FRHICommandListImmediate::ImmediateFlush的实现代码中体现出来:

    // Engine\Source\Runtime\RHI\Public\RHICommandList.inl
    
    void FRHICommandListImmediate::ImmediateFlush(EImmediateFlushType::Type FlushType)
    {
        switch (FlushType)
        {
        // 等待任务完成.
        case EImmediateFlushType::WaitForOutstandingTasksOnly:
            {
                WaitForTasks();
            }
            break;
        // 派发RHI线程(执行命令队列)
        case EImmediateFlushType::DispatchToRHIThread:
            {
                if (HasCommands())
                {
                    GRHICommandList.ExecuteList(*this);
                }
            }
            break;
        // 等待RHI线程派发.
        case EImmediateFlushType::WaitForDispatchToRHIThread:
            {
                if (HasCommands())
                {
                    GRHICommandList.ExecuteList(*this);
                }
                WaitForDispatch();
            }
            break;
        // 刷新RHI线程.
        case EImmediateFlushType::FlushRHIThread:
            {
                // 派发并等待RHI线程.
                if (HasCommands())
                {
                    GRHICommandList.ExecuteList(*this);
                }
                WaitForDispatch();
                
                // 等待RHI线程任务.
                if (IsRunningRHIInSeparateThread())
                {
                    WaitForRHIThreadTasks();
                }
                
                // 重置正在处理的任务列表.
                WaitForTasks(true);
            }
            break;
        case EImmediateFlushType::FlushRHIThreadFlushResources:
        case EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes:
            {
                if (HasCommands())
                {
                    GRHICommandList.ExecuteList(*this);
                }
                WaitForDispatch();
                WaitForRHIThreadTasks();
                WaitForTasks(true);
                
                // 刷新管线状态缓存的资源.
                PipelineStateCache::FlushResources();
                // 刷新将要删除的资源.
                FRHIResource::FlushPendingDeletes(FlushType == EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes);
            }
            break;
        }
    }

    上面代码中涉及到了若干种处理和等待任务的接口,它们的实现如下:

    // 等待任务完成.
    void FRHICommandListBase::WaitForTasks(bool bKnownToBeComplete)
    {
        if (WaitOutstandingTasks.Num())
        {
            // 检测是否存在未完成的等待任务.
            bool bAny = false;
            for (int32 Index = 0; Index < WaitOutstandingTasks.Num(); Index++)
            {
                if (!WaitOutstandingTasks[Index]->IsComplete())
                {
                    bAny = true;
                    break;
                }
            }
            // 存在就利用TaskGraph的接口开启线程等待.
            if (bAny)
            {
                ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
                FTaskGraphInterface::Get().WaitUntilTasksComplete(WaitOutstandingTasks, RenderThread_Local);
            }
            // 重置等待任务列表.
            WaitOutstandingTasks.Reset();
        }
    }
    
    // 等待渲染线程派发完成.
    void FRHICommandListBase::WaitForDispatch()
    {
        // 如果RenderThreadSublistDispatchTask已完成, 则置空.
        if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
        {
            RenderThreadSublistDispatchTask = nullptr;
        }
        
        // RenderThreadSublistDispatchTask有未完成的任务.
        while (RenderThreadSublistDispatchTask.GetReference())
        {
            ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
            FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
            if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
            {
                RenderThreadSublistDispatchTask = nullptr;
            }
        }
    }
    
    // 等待RHI线程任务完成.
    void FRHICommandListBase::WaitForRHIThreadTasks()
    {
        bool bAsyncSubmit = CVarRHICmdAsyncRHIThreadDispatch.GetValueOnRenderThread() > 0; //r.RHICmdAsyncRHIThreadDispatch
        ENamedThreads::Type RenderThread_Local = ENamedThreads::GetRenderThread_Local();
        
        // 相当于执行FRHICommandListBase::WaitForDispatch()
        if (bAsyncSubmit)
        {
            if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
            {
                RenderThreadSublistDispatchTask = nullptr;
            }
            while (RenderThreadSublistDispatchTask.GetReference())
            {
                if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
                {
                    while (!RenderThreadSublistDispatchTask->IsComplete())
                    {
                        FPlatformProcess::SleepNoStats(0);
                    }
                }
                else
                {
                    FTaskGraphInterface::Get().WaitUntilTaskCompletes(RenderThreadSublistDispatchTask, RenderThread_Local);
                }
                
                if (RenderThreadSublistDispatchTask.GetReference() && RenderThreadSublistDispatchTask->IsComplete())
                {
                    RenderThreadSublistDispatchTask = nullptr;
                }
            }
            // now we can safely look at RHIThreadTask
        }
        
        // 如果RHI线程任务已完成, 则置空任务.
        if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
        {
            RHIThreadTask = nullptr;
            PrevRHIThreadTask = nullptr;
        }
        
        // 如果RHI线程有任务未完成, 则执行并等待.
        while (RHIThreadTask.GetReference())
        {
            // 如果已在处理, 则用sleep(0)跳过此时间片.
            if (FTaskGraphInterface::Get().IsThreadProcessingTasks(RenderThread_Local))
            {
                while (!RHIThreadTask->IsComplete())
                {
                    FPlatformProcess::SleepNoStats(0);
                }
            }
            // 任务尚未处理, 开始并等待之.
            else
            {
                FTaskGraphInterface::Get().WaitUntilTaskCompletes(RHIThreadTask, RenderThread_Local);
            }
            
            // 如果RHI线程任务已完成, 则置空任务.
            if (RHIThreadTask.GetReference() && RHIThreadTask->IsComplete())
            {
                RHIThreadTask = nullptr;
                PrevRHIThreadTask = nullptr;
            }
        }
    }

    RHI控制台变量

    前面章节的代码也显示RHI体系涉及的控制台变量非常多,下面列出部分控制台变量,以便调试、优化RHI渲染效果或效率:

    名称描述
    r.RHI.Name 显示当前RHI的名字,如D3D11。
    r.RHICmdAsyncRHIThreadDispatch 实验选项,是否执行RHI调度异步。可使数据更快地刷新到RHI线程,避免帧末尾出现卡顿。
    r.RHICmdBalanceParallelLists 允许启用DrawList的预处理,以尝试在命令列表之间均衡负载。0:关闭,1:开启,2:实验选项,使用上一帧的结果(在分屏等不做任何事情)。
    r.RHICmdBalanceTranslatesAfterTasks 实验选项,平衡并行翻译后的渲染任务完成。可最小化延迟上下文的数量,但会增加启动转译的延迟。
    r.RHICmdBufferWriteLocks 仅与RHI线程相关。用于诊断缓冲锁问题的调试选项。
    r.RHICmdBypass 是否绕过RHI命令列表,立即发送RHI命令。0:禁用(需开启多线程渲染),1:开启。
    r.RHICmdCollectRHIThreadStatsFromHighLevel 这将在执行的RHI线程上推送统计信息,这样就可以确定它们来自哪个高层级的Pass。对帧速率有不利影响。默认开启。
    r.RHICmdFlushOnQueueParallelSubmit 在提交后立即等待并行命令列表的完成。问题诊断。只适用于部分RHI。
    r.RHICmdFlushRenderThreadTasks 如果为真,则每次调用时都刷新渲染线程任务。问题诊断。这是一个更细粒度cvars的主开关。
    r.RHICmdForceRHIFlush 对每个任务强制刷新发送给RHI线程。问题诊断。
    r.RHICmdMergeSmallDeferredContexts 合并小的并行转译任务,基于r.RHICmdMinDrawsPerParallelCmdList。
    r.RHICmdUseDeferredContexts 使用延迟上下文并行执行命令列表。只适用于部分RHI。
    r.RHICmdUseParallelAlgorithms True使用并行算法。如果r.RHICmdBypass为1则忽略。
    r.RHICmdUseThread 使用RHI线程。问题诊断。
    r.RHICmdWidth 控制并行渲染器中大量事物的任务粒度。
    r.RHIThread.Enable 启用/禁用RHI线程,并确定RHI工作是否在专用线程上运行。
    RHI.GPUHitchThreshold GPU上检测卡顿的阈值(毫秒)。
    RHI.MaximumFrameLatency 可以排队进行渲染的帧数。
    RHI.SyncThreshold 在垂直同步功能启用前的连续“快速”帧数。
    RHI.TargetRefreshRate 如果非零,则显示的更新频率永远不会超过目标刷新率(以Hz为单位)。

    注:以上只列出部分RHI相关的变量,还有很多未列出。

    参考

    剖析虚幻渲染体系(10)- RHI

  • 相关阅读:
    PV、UV、VV,CV的含义
    JS动态修改页面EasyUI datebox不生效、EasyUI动态添加Class、EasyUI动态渲染解析解决方案
    JavaScript Object.defineProperty()方法详解
    jquery on()方法绑定多个选择器,多个事件
    jQuery自定义事件
    jquery插件开发快速入门
    JavaScript日期处理
    js原生函数bind
    使用jquery.pjax实现SPA单页面应用
    PushState+Ajax实现简单的单页面应用SPA
  • 原文地址:https://www.cnblogs.com/kekec/p/15651741.html
Copyright © 2020-2023  润新知