• UE4之Game、Render、RHI多线程架构


    游戏线程(GameThread)

    GameThread是引擎运行的心脏,承载游戏逻辑、运行流程的工作,也是其它线程的数据发起者。在FEngineLoop::Tick函数执行每帧逻辑的更新。

    在引擎启动时会把GameThread的线程id存储到全局变量GGameThreadId中,且稍后会设置到TaskGraph系统中。

    int32 FEngineLoop::PreInitPreStartupScreen(const TCHAR* CmdLine)
    {
        // ... ...
        
        // 创建线程自己的TLS数据FPerThreadFreeBlockLists  注:Binned2、Binned3内存分配器需要
        FMemory::SetupTLSCachesOnCurrentThread();
        
        // remember thread id of the main thread
        GGameThreadId = FPlatformTLS::GetCurrentThreadId();// 游戏线程id
        GIsGameThreadIdInitialized = true; // 游戏线程id是否被初始化
    
        FPlatformProcess::SetThreadAffinityMask(FPlatformAffinity::GetMainGameMask()); // 设置当前线程的cpu核的相关性  注:防止在多个cpu核心上跳来跳去,引发性能问题
        FPlatformProcess::SetupGameThread(); // 设置游戏线程数据(但很多平台都是空的实现体)
        
        // ... ...
        
        FTaskGraphInterface::Startup(FPlatformMisc::NumberOfCores()); // TaskGraph初始化,并根据当前机器cpu的核数来创建工作线程
        FTaskGraphInterface::Get().AttachToThread(ENamedThreads::GameThread); // 附加到TaskGraph的GameThread命名插槽中. 这样游戏线程便和TaskGraph联动了起来.
        
    
        if (GUseThreadedRendering)  // 如果使用渲染线程
        {
            if (GRHISupportsRHIThread) // 当前平台如果支持RHI线程
            {
                const bool DefaultUseRHIThread = true;
                GUseRHIThread_InternalUseOnly = DefaultUseRHIThread;
                if (FParse::Param(FCommandLine::Get(), TEXT("rhithread")))
                {
                    GUseRHIThread_InternalUseOnly = true; // 创建独立的RHIThread,放加入到TaskGraph中,RHI会跑在TaskGraph的RHIThread上
                }
                else if (FParse::Param(FCommandLine::Get(), TEXT("norhithread")))
                {
                    GUseRHIThread_InternalUseOnly = false;
                }
            }
                
            SCOPED_BOOT_TIMING("StartRenderingThread");
            StartRenderingThread();  // 创建并启动渲染线程
        }
        
        // ... ...
    }

    游戏线程和TaskGraph系统的ENamedThreads::GameThread其实是一回事,都是同一个线程!

    经过上面的初始化和设置后,其它地方就可以通过TaskGraph系统并行地处理任务了,也可以访问全局变量,以便判断游戏线程是否初始化完,当前线程是否游戏线程:

    bool IsInGameThread()
    {
        return GIsGameThreadIdInitialized && FPlatformTLS::GetCurrentThreadId() == GGameThreadId;
    }

    渲染线程(RenderThread)

    RenderThread在TaskGraph系统中有一个任务队列,其他线程(主要是GameThread)通过宏ENQUEUE_RENDER_COMMAND向该队列中填充任务

    RenderThread则不断从这个队列中取出任务来执行,从而生成与平台无关的Command List(渲染指令列表)。注:整个过程是异步的

    RenderThread是其他线程(主要是GameThread)的奴隶,只是简单地作为工作线程不断执行它们赋予的工作。 

    RenderingThread.h声明了全部对外的接口,部分如下:

    // Engine\Source\Runtime\RenderCore\Public\RenderingThread.h
    
    // 是否启用了独立的渲染线程, 如果为false, 则所有渲染命令会被立即执行, 而不是放入渲染命令队列.
    extern RENDERCORE_API bool GIsThreadedRendering;
    
    // 渲染线程是否应该被创建. 通常被命令行参数或ToggleRenderingThread控制台参数设置.
    extern RENDERCORE_API bool GUseThreadedRendering;
    
    // 是否开启RHI线程
    extern RENDERCORE_API void SetRHIThreadEnabled(bool bEnableDedicatedThread, bool bEnableRHIOnTaskThreads);
    
    (......)
    
    // 开启渲染线程.
    extern RENDERCORE_API void StartRenderingThread();
    
    // 停止渲染线程.
    extern RENDERCORE_API void StopRenderingThread();
    
    // 检查渲染线程是否健康(是否Crash), 如果crash, 则会用UE_Log输出日志.
    extern RENDERCORE_API void CheckRenderingThreadHealth();
    
    // 检查渲染线程是否健康(是否Crash)
    extern RENDERCORE_API bool IsRenderingThreadHealthy();
    
    // 增加一个必须在下一个场景绘制前或flush渲染命令前完成的任务.
    extern RENDERCORE_API void AddFrameRenderPrerequisite(const FGraphEventRef& TaskToAdd);
    
    // 手机帧渲染前序任务, 保证所有渲染命令被入队.
    extern RENDERCORE_API void AdvanceFrameRenderPrerequisite();
    
    // 等待所有渲染线程的渲染命令被执行完毕. 会卡住游戏线程, 只能被游戏线程调用.
    extern RENDERCORE_API void FlushRenderingCommands(bool bFlushDeferredDeletes = false);
    
    extern RENDERCORE_API void FlushPendingDeleteRHIResources_GameThread();
    extern RENDERCORE_API void FlushPendingDeleteRHIResources_RenderThread();
    
    extern RENDERCORE_API void TickRenderingTickables();
    
    extern RENDERCORE_API void StartRenderCommandFenceBundler();
    extern RENDERCORE_API void StopRenderCommandFenceBundler();
    
    (......)

    RenderingThread.h还有一个非常重要的宏ENQUEUE_RENDER_COMMAND,它的作用是向渲染线程入队渲染指令。下面是它的声明和实现:

    // 向渲染线程入队渲染指令, Type指明了渲染操作的名字.
    #define ENQUEUE_RENDER_COMMAND(Type) \
        struct Type##Name \
        {  \
            static const char* CStr() { return #Type; } \
            static const TCHAR* TStr() { return TEXT(#Type); } \
        }; \
        EnqueueUniqueRenderCommand<Type##Name>

    上面最后一句使用了EnqueueUniqueRenderCommand命令,继续追踪之:

    /* UnrealEngine\Engine\Source\Runtime\RenderCore\Public\RenderingThread.h */
    
    /** The parent class of commands stored in the rendering command queue. */
    class RENDERCORE_API FRenderCommand
    {
    public:
        // All render commands run on the render thread
        static ENamedThreads::Type GetDesiredThread() // 所有渲染指令都必须在渲染线程执行.
        {
            check(!GIsThreadedRendering || ENamedThreads::GetRenderThread() != ENamedThreads::GameThread);
            return ENamedThreads::GetRenderThread(); // 开启渲染多线程时,返回渲染线程。不开启渲染多线程时,返回GameThread
        }
    
        static ESubsequentsMode::Type GetSubsequentsMode()
        {
            // Don't support tasks having dependencies on us, reduces task graph overhead tracking and dealing with subsequents
            return ESubsequentsMode::FireAndForget;
        }
    };
    
    template<typename TSTR, typename LAMBDA>
    class TEnqueueUniqueRenderCommandType : public FRenderCommand
    {
    public:
        TEnqueueUniqueRenderCommandType(LAMBDA&& InLambda) : Lambda(Forward<LAMBDA>(InLambda)) {}
    
        void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
        {
            TRACE_CPUPROFILER_EVENT_SCOPE_ON_CHANNEL_STR(TSTR::TStr(), RenderCommandsChannel);
            FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand();
            Lambda(RHICmdList);
        }
    
        FORCEINLINE_DEBUGGABLE TStatId GetStatId() const
        {
    #if STATS
            static struct FThreadSafeStaticStat<FStat_EnqueueUniqueRenderCommandType> StatPtr_EnqueueUniqueRenderCommandType;
            return StatPtr_EnqueueUniqueRenderCommandType.GetStatId();
    #else
            return TStatId();
    #endif
        }
    
    private:
        LAMBDA Lambda; // 缓存渲染回调函数.
    };
    
    /*************************************************************************************************************/
    
    template<typename TSTR, typename LAMBDA> // 传入的TSTR为结构体类型,里面包含CStr和TStr的静态方法,为渲染命令名字。 LAMBDA是回调函数
    FORCEINLINE_DEBUGGABLE void EnqueueUniqueRenderCommand(LAMBDA&& Lambda)
    {
        QUICK_SCOPE_CYCLE_COUNTER(STAT_EnqueueUniqueRenderCommand);
        typedef TEnqueueUniqueRenderCommandType<TSTR, LAMBDA> EURCType;  // EURCType类型即为TEnqueueUniqueRenderCommandType<TSTR, LAMBDA>类型
    
    #if 0 // UE_SERVER && UE_BUILD_DEBUG
        UE_LOG(LogRHI, Warning, TEXT("Render command '%s' is being executed on a dedicated server."), TSTR::TStr())
    #endif
    
        if (IsInRenderingThread()) // 如果在渲染线程内直接执行回调而不入队渲染命令.
        {
            FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand();
            Lambda(RHICmdList); // 在渲染线程中,直接执行传入的lamda匿名函数
        }
        else
        {
            if (ShouldExecuteOnRenderThread()) // if ((GIsThreadedRendering || !IsInGameThread())) // 使用渲染线程 or 当前不为GameThread  // 需要在独立的渲染线程执行
            {
                CheckNotBlockedOnRenderThread();
                TGraphTask<EURCType>::CreateTask().ConstructAndDispatchWhenReady(Forward<LAMBDA>(Lambda)); // 向渲染线程的TaskGraph队列里面投递类型名为EURCType类型的任务,并将lamda匿名函数作为参数传入该任务的构造函数
            }
            else
            {
                // 不在独立的渲染线程执行,则构建EURCType类型的对象,然后直接执行
                EURCType TempCommand(Forward<LAMBDA>(Lambda));
                FScopeCycleCounter EURCMacro_Scope(TempCommand.GetStatId());
                TempCommand.DoTask(ENamedThreads::GameThread, FGraphEventRef());
            }
        }
    }

    为了更好理解入队渲染命令操作,举2个具体的例子:

    例1:在GameThread执行LoadMap切地图,在卸载掉Old World之后,会在TrimMemory()函数中使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个FlushCommand任务

    ENQUEUE_RENDER_COMMAND(FlushCommand)(
    /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------
    struct FlushCommandName
    {
        static const char* CStr() { return "FlushCommand"; }
        static const TCHAR* TStr() { return L"FlushCommand"; }
    };
    EnqueueUniqueRenderCommand<FlushCommandName>( */
        [](FRHICommandList& RHICmdList)
        {
            GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources);
            RHIFlushResources();
            GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources);
        });

    例2:在GameThread中执行控制台变量命令,会使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个OnCVarChange1任务,以便将新的数值传递到RenderThread的逻辑中使用

    virtual void OnCVarChange(int32& Dest, int32 NewValue)
    {
        int32* DestPtr = &Dest;
        ENQUEUE_RENDER_COMMAND(OnCVarChange1)(
        /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------
        struct OnCVarChange1Name
        {
            static const char* CStr() { return "OnCVarChange1"; }
            static const TCHAR* TStr() { return L"OnCVarChange1"; }
        };
        EnqueueUniqueRenderCommand<OnCVarChange1Name>( */
            [DestPtr, NewValue](FRHICommandListImmediate& RHICmdList)
            {
                *DestPtr = NewValue;
            });
    }

    FRenderingThread承载了渲染线程的主要工作,它的部分接口和实现代码如下:

    // Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp
    
    class FRenderingThread : public FRunnable
    {
    private:
        bool bAcquiredThreadOwnership;    // 当没有独立的RHI线程时, 渲染线程将被其它线程捕获.
    
    public:
        FEvent* TaskGraphBoundSyncEvent; // TaskGraph同步事件, 以便在主线程使用渲染线程之前就将渲染线程绑定到TaskGraph体系中.
    
        FRenderingThread()
        {
            bAcquiredThreadOwnership = false;
            // 获取同步事件.
            TaskGraphBoundSyncEvent    = FPlatformProcess::GetSynchEventFromPool(true);
            RHIFlushResources();
        }
    
        // FRunnable interface.
        virtual bool Init(void) override
        {
            // 获取当前线程ID到全局变量GRenderThreadId, 以便其它地方引用.
            GRenderThreadId = FPlatformTLS::GetCurrentThreadId();
            
            // 处理线程捕获关系.
            if (!IsRunningRHIInSeparateThread())
            {
                bAcquiredThreadOwnership = true;
                RHIAcquireThreadOwnership();
            }
    
            return true; 
        }
        
        (......)
        
        virtual uint32 Run(void) override
        {
            // 设置TLS.
            FMemory::SetupTLSCachesOnCurrentThread();
            // 设置渲染线程平台相关的数据.
            FPlatformProcess::SetupRenderThread();
    
            (......)
            
            {
                // 进入渲染线程主循环.
                RenderingThreadMain( TaskGraphBoundSyncEvent );
            }
            
            FMemory::ClearAndDisableTLSCachesOnCurrentThread();
            return 0;
        }
    };

    可见它在运行之后会进入渲染线程逻辑,这里再进入RenderingThreadMain代码一探究竟:

    void RenderingThreadMain( FEvent* TaskGraphBoundSyncEvent )
    {
        LLM_SCOPE(ELLMTag::RenderingThreadMemory);
        
        // 将渲染线程和局部线程线程插槽设置成ActualRenderingThread和ActualRenderingThread_Local.
        ENamedThreads::Type RenderThread = ENamedThreads::Type(ENamedThreads::ActualRenderingThread);
    
        ENamedThreads::SetRenderThread(RenderThread);
        ENamedThreads::SetRenderThread_Local(ENamedThreads::Type(ENamedThreads::ActualRenderingThread_Local));
        
        // 将当前线程附加到TaskGraph的RenderThread插槽中.
        FTaskGraphInterface::Get().AttachToThread(RenderThread);
        FPlatformMisc::MemoryBarrier();
    
        // 触发同步事件, 通知主线程渲染线程已经附加到TaskGraph, 已经准备好接收任务.
        if( TaskGraphBoundSyncEvent != NULL )
        {
            TaskGraphBoundSyncEvent->Trigger();
        }
    
        (......)
        
        // 渲染线程不同阶段的处理.
        FCoreDelegates::PostRenderingThreadCreated.Broadcast();
        check(GIsThreadedRendering);
        FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(RenderThread);
        FPlatformMisc::MemoryBarrier();
        check(!GIsThreadedRendering);
        FCoreDelegates::PreRenderingThreadDestroyed.Broadcast();
        
        (......)
        
        // 恢复线程线程到游戏线程.
        ENamedThreads::SetRenderThread(ENamedThreads::GameThread);
        ENamedThreads::SetRenderThread_Local(ENamedThreads::GameThread_Local);
        FPlatformMisc::MemoryBarrier();
    }

    不过这里还留有一个很大的疑问,那就是FRenderingThread只是获取当前线程作为渲染线程并附加到TaskGraph中,并没有创建线程。

    那么是哪里创建的渲染线程呢?继续追踪,结果发现是在StartRenderingThread()接口中创建了FRenderingThread实例,它的实现代码如下(节选):

    // Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp
    
    void StartRenderingThread()
    {
        (......)
    
        // Turn on the threaded rendering flag.
        GIsThreadedRendering = true;
    
        // 创建FRenderingThread实例.
        GRenderingThreadRunnable = new FRenderingThread();
    
        // 创建渲染线程!!
        GRenderingThread = FRunnableThread::Create(GRenderingThreadRunnable, *BuildRenderingThreadName(ThreadCount), 0, FPlatformAffinity::GetRenderingThreadPriority(), FPlatformAffinity::GetRenderingThreadMask(), FPlatformAffinity::GetRenderingThreadFlags());
        
        (......)
    
        // 开启渲染命令的栅栏.
        FRenderCommandFence Fence;
        Fence.BeginFence();
        Fence.Wait();
    
        (......)
    }

    如果继续追踪,会发现StartRenderingThread()是在FEngineLoop::PreInitPostStartupScreen中调用的。

    至此,渲染线程的创建、初始化以及主要接口的实现都剖析完了。

    RHI线程(RHIThread)

    RenderThread作为前端(frontend)产生的Command List是平台无关的,是抽象的图形API调用;

    而RHIThread作为后端(backend)会执行和转换渲染线程的Command List成为指定图形API的调用(称为Graphical Command),并提交到GPU执行。

    RHI线程的工作是转换渲染指令到指定图形API,创建、上传渲染资源到GPU。实现代码如下:

    // Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp
    
    class FRHIThread : public FRunnable
    {
    public:
        FRunnableThread* Thread;    // 所在的RHI线程.
    
        FRHIThread()
            : Thread(nullptr)
        {
            check(IsInGameThread());
        }
        
        void Start()
        {
            // 开始时创建RHI线程.
            Thread = FRunnableThread::Create(this, TEXT("RHIThread"), 512 * 1024, FPlatformAffinity::GetRHIThreadPriority(),
                FPlatformAffinity::GetRHIThreadMask(), FPlatformAffinity::GetRHIThreadFlags()
                );
            check(Thread);
        }
    
        virtual uint32 Run() override
        {
            LLM_SCOPE(ELLMTag::RHIMisc);
            
            // 初始化TLS
            FMemory::SetupTLSCachesOnCurrentThread();
            // 将FRHIThread所在的RHI线程附加到askGraph体系中,并指定到ENamedThreads::RHIThread。
            FTaskGraphInterface::Get().AttachToThread(ENamedThreads::RHIThread);
            // 启动RHI线程,直到线程返回。
            FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(ENamedThreads::RHIThread);
            // 清理TLS.
            FMemory::ClearAndDisableTLSCachesOnCurrentThread();
            return 0;
        }
        
        // 单例接口。
        static FRHIThread& Get()
        {
            static FRHIThread Singleton; // 使用了局部静态变量,可以保证线程安全。
            return Singleton;
        }
    };

    可见RHI线程不同于渲染线程,是直接在FRHIThread对象内创建实际的线程。而FRHIThread的创建也是在StartRenderingThread()中:

    void StartRenderingThread()
    {
        (......)
    
        if (GUseRHIThread_InternalUseOnly)
        {
            FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);        
            if (!FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::RHIThread))
            {
                // 创建FRHIThread实例并启动它.
                FRHIThread::Get().Start();
            }
            DECLARE_CYCLE_STAT(TEXT("Wait For RHIThread"), STAT_WaitForRHIThread, STATGROUP_TaskGraphTasks);
            
            // 创建RHI线程拥有者捕获任务, 让游戏线程等待.
            FGraphEventRef CompletionEvent = TGraphTask<FOwnershipOfRHIThreadTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(true, GET_STATID(STAT_WaitForRHIThread));
            QUICK_SCOPE_CYCLE_COUNTER(STAT_StartRenderingThread);
            // 让游戏线程或局部线程等待RHI线程处理(捕获了线程拥有者, 大多数图形API为空)完毕.
            FTaskGraphInterface::Get().WaitUntilTaskCompletes(CompletionEvent, ENamedThreads::GameThread_Local);
            // 存储RHI线程id.
            GRHIThread_InternalUseOnly = FRHIThread::Get().Thread;
            check(GRHIThread_InternalUseOnly);
            GIsRunningRHIInDedicatedThread_InternalUseOnly = true;
            GIsRunningRHIInSeparateThread_InternalUseOnly = true;
            GRHIThreadId = GRHIThread_InternalUseOnly->GetThreadID();
            
            GRHICommandList.LatchBypass();
        }
        
        (......)
    }

    以Fortnite(堡垒之夜)移动端为例,在开启RHI线程之前,渲染线程急剧地上下波动,而加了RHI线程之后,波动平缓许多,和游戏线程基本保持一致,帧率也提升不少:

    GameThread、RenderThread、RHIThread之间的同步机制

    这3个线程处理的数据通常是不同帧的,譬如GameThread处理N帧数据,RenderThread和RHIThread处理N-1帧数据。

    但也存在例外,比如RenderThread和RHIThread运行很快,几乎不存在延迟,这种情况下,GameThread处理N帧,而RenderThread可能处理N或N-1帧,RHIThread也可能在转换N或N-1帧。

    但是,RenderThread不能落后游戏线程一帧,否则GameThread会卡住,直到RenderThread处理所有指令。

    游戏线程和渲染线程的同步

    游戏线程不可能领先于渲染线程超过一帧(最多快一帧),否则游戏线程会等待渲染线程处理完。它们的同步机制涉及两个关键的概念:

    // Engine\Source\Runtime\RenderCore\Public\RenderCommandFence.h
    
    // 渲染命令栅栏
    class RENDERCORE_API FRenderCommandFence
    {
    public:
        // 向渲染命令队列增加一个栅栏. bSyncToRHIAndGPU是否同步RHI和GPU交换Buffer, 否则只等待渲染线程.
        void BeginFence(bool bSyncToRHIAndGPU = false); 
    
        // 等待栅栏被执行. bProcessGameThreadTasks没有作用.
        void Wait(bool bProcessGameThreadTasks = false) const;
    
        // 是否完成了栅栏.
        bool IsFenceComplete() const;
    
    private:
        mutable FGraphEventRef CompletionEvent; // 处理完成同步的事件
        ENamedThreads::Type TriggerThreadIndex; // 处理完之后需要触发的线程类型.
    };
    
    // Engine\Source\Runtime\Engine\Public\UnrealEngine.h
    class FFrameEndSync
    {
        FRenderCommandFence Fence[2]; // 渲染栅栏对.
        int32 EventIndex; // 当前事件索引
    public:
        // 同步游戏线程和渲染线程. bAllowOneFrameThreadLag是否允许渲染线程一帧的延迟.
        void Sync( bool bAllowOneFrameThreadLag )
        {
            Fence[EventIndex].BeginFence(true); // 开启栅栏, 强制同步RHI和GPU交换链的.
    
            bool bEmptyGameThreadTasks = !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread);
            
            // 保证游戏线程至少跑过一次任务.
            if (bEmptyGameThreadTasks)
            {
                FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread);
            }
    
            // 如果允许延迟, 交换事件索引.
            if( bAllowOneFrameThreadLag )
            {
                EventIndex = (EventIndex + 1) % 2;
            }
    
            (......)
            
            // 开启栅栏等待.
            Fence[EventIndex].Wait(bEmptyGameThreadTasks);
        }
    };

    在FRenderCommandFence的BeginFence函数中

    当GameThread与RHI线程及GPU同步时,GameThread会使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个FSyncFrameCommand任务,以便将Command List同步投递到RHI线程

    当GameThread与RenderThread同步时,GameThread会创建一个FNullGraphTask空任务,放到RenderThread的TaskGraph队列中让其执行

    在FRenderCommandFence的Wait函数中,会检查投递给RenderThread的CompletionEvent是否被执行,如果没有执行则调用GameThreadWaitForTask函数来阻塞等待(通过Event实现)

    void FRenderCommandFence::BeginFence(bool bSyncToRHIAndGPU)
    {
        if (!GIsThreadedRendering)
        {
            return;
        }
        else
        {
            // Render thread is a default trigger for the CompletionEvent
            TriggerThreadIndex = ENamedThreads::ActualRenderingThread;
                    
            if (BundledCompletionEvent.GetReference() && IsInGameThread())
            {
                CompletionEvent = BundledCompletionEvent;
                return;
            }
    
            int32 GTSyncType = CVarGTSyncType.GetValueOnAnyThread();
            if (bSyncToRHIAndGPU)
            {
                // Don't sync to the RHI and GPU if GtSyncType is disabled, or we're not vsyncing
                //@TODO: do this logic in the caller?
                static auto CVarVsync = IConsoleManager::Get().FindConsoleVariable(TEXT("r.VSync")); // 是否开了VSync
                check(CVarVsync != nullptr);
    
                if ( GTSyncType == 0 || CVarVsync->GetInt() == 0 ) // r.GTSyncType为0或r.VSync为0时,GameThread不与RHI线程及GPU同步
            {
                    bSyncToRHIAndGPU = false;
                }
            }
    
    
            if (bSyncToRHIAndGPU) // GameThread与RHI线程及GPU同步时
            {
                if (IsRHIThreadRunning())
                {
                    // Change trigger thread to RHI
                    TriggerThreadIndex = ENamedThreads::RHIThread;
                }
                
                // Create a task graph event which we can pass to the render or RHI threads.
                CompletionEvent = FGraphEvent::CreateGraphEvent();
    
                FGraphEventRef InCompletionEvent = CompletionEvent;
                /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------
                struct FSyncFrameCommandName
                {
                static const char* CStr() { return "FSyncFrameCommand"; }
                static const TCHAR* TStr() { return L"FSyncFrameCommand"; }
                };
                EnqueueUniqueRenderCommand<FSyncFrameCommandName>( */
                ENQUEUE_RENDER_COMMAND(FSyncFrameCommand)(
                    [InCompletionEvent, GTSyncType](FRHICommandListImmediate& RHICmdList)
                    {
                        if (IsRHIThreadRunning()) // 如果开启了RHI线程
                        {
                            ALLOC_COMMAND_CL(RHICmdList, FRHISyncFrameCommand)(InCompletionEvent, GTSyncType); // 将创建的CompletionEvent投递到RHI线程的TaskGraph的任务队列中
                            RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
                        }
                        else  // 渲染线程直接执行
                        {
                            FRHISyncFrameCommand Command(InCompletionEvent, GTSyncType);
                            Command.Execute(RHICmdList);
                        }
                    });
            }
            else  // GameThead与RenderThread同步
            {
                // Sync Game Thread with Render Thread only
                DECLARE_CYCLE_STAT(TEXT("FNullGraphTask.FenceRenderCommand"),
                STAT_FNullGraphTask_FenceRenderCommand,
                    STATGROUP_TaskGraphTasks);
    
                CompletionEvent = TGraphTask<FNullGraphTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(
                    GET_STATID(STAT_FNullGraphTask_FenceRenderCommand), ENamedThreads::GetRenderThread());
            }
        }
    }
    
    
    /**
     * Waits for pending fence commands to retire.
     */
    void FRenderCommandFence::Wait(bool bProcessGameThreadTasks) const
    {
        if (!IsFenceComplete())
        {
            StopRenderCommandFenceBundler();
    
            GameThreadWaitForTask(CompletionEvent, TriggerThreadIndex, bProcessGameThreadTasks);
        }
    }
    
    bool FRenderCommandFence::IsFenceComplete() const
    {
        if (!GIsThreadedRendering)
        {
            return true;
        }
        check(IsInGameThread() || IsInAsyncLoadingThread());
        CheckRenderingThreadHealth();
        if (!CompletionEvent.GetReference() || CompletionEvent->IsComplete())
        {
            CompletionEvent = NULL; // this frees the handle for other uses, the NULL state is considered completed
            return true;
        }
        return false;
    }

    FFrameEndSync的使用是在FEngineLoop::Tick中:

    // Engine\Source\Runtime\Launch\Private\LaunchEngineLoop.cpp
    
    void FEngineLoop::Tick()
    {
        (......)
        
        // 在引擎循环的帧末尾添加游戏线程和渲染线程的同步事件.
        {
            static FFrameEndSync FrameEndSync; // 局部静态变量, 线程安全.
            static auto CVarAllowOneFrameThreadLag = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.OneFrameThreadLag"));
            // 同步游戏和渲染线程, 是否允许一帧的延迟可由控制台命令控制. 默认是开启的.
            FrameEndSync.Sync( CVarAllowOneFrameThreadLag->GetValueOnGameThread() != 0 );
        }
        
        (......)
    }

    FlushRenderingCommands

    在游戏线程中调用,会阻塞游戏线程,强行等待所有的渲染线程pending render command以及RHI线程中的指令执行完,相当于一次完整地对渲染线程的同步

    /
     * Waits for the rendering thread to finish executing all pending rendering commands.  Should only be used from the game thread.
     */
    void FlushRenderingCommands(bool bFlushDeferredDeletes)
    {
        if (!GIsRHIInitialized)
        {
            return;
        }
        FSuspendRenderingTickables SuspendRenderingTickables;
    
        // Need to flush GT because render commands from threads other than GT are sent to
        // the main queue of GT when RT is disabled
        if (!GIsThreadedRendering
            && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread)
            && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread_Local))
        {
            FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread);
            FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread_Local);
        }
    
        ENQUEUE_RENDER_COMMAND(FlushPendingDeleteRHIResourcesCmd)(
            [bFlushDeferredDeletes](FRHICommandListImmediate& RHICmdList)
        {
            RHICmdList.ImmediateFlush(
                bFlushDeferredDeletes ?
                EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes :
                EImmediateFlushType::FlushRHIThreadFlushResources);
        });
    
        // Find the objects which may be cleaned up once the rendering thread command queue has been flushed.
        FPendingCleanupObjects* PendingCleanupObjects = GetPendingCleanupObjects();
    
        // Issue a fence command to the rendering thread and wait for it to complete.
        FRenderCommandFence Fence;
        Fence.BeginFence(); // 创建一个FNullGraphTask空任务,放到RenderThread的TaskGraph队列中让其执行
        Fence.Wait(); // 检查投递给RenderThread的CompletionEvent是否被执行,如果没有执行就会阻塞等待
    
        // Delete the objects which were enqueued for deferred cleanup before the command queue flush.
        delete PendingCleanupObjects;
    }

    渲染线程和RHI线程的同步

    RenderThread每次在调用RenderViewFamily_RenderThread的起始处,会阻塞等待所有RHI指令处理完成,然后才开始当前帧的渲染逻辑。

    FMobileSceneRender渲染管线下,RenderThread每一帧都会执行ImmediateFlush,阻塞等待RHI处理完FGraphEventRef RHIThreadTask任务,代码如下:

    FDeferredShadingSceneRender渲染管线下,RenderThread每一帧都会执行ImmediateFlush,阻塞等待RHI处理完FGraphEventRef RHIThreadTask任务,代码如下:

    阻塞时的stats栈(移动端)

    参考

    剖析虚幻渲染体系(02)- 多线程渲染

    UE4主线程与渲染线程同步

  • 相关阅读:
    汇编四(习题)
    汇编子程序模块化(near&far)
    win10关闭防火墙
    python中numpy中的shape()的使用
    文件的拷贝linux命令
    python中的os.path.dirname(__file__)
    ubuntu系统下安装及查看opencv版本
    用git命令行克隆项目及出现failed解决方案
    ERROR: Could not install packages due to an EnvironmentError: [Errno 13] Permission denied: '
    记录CenterNet代码编译成功运行
  • 原文地址:https://www.cnblogs.com/kekec/p/15464958.html
Copyright © 2020-2023  润新知