• UE4 stats性能埋点


    某个Stats文件所统计到大类(Group Name)如下:

    某个Stats文件Group Name为Memory的所统计到细项如下:

    统计项类型:

    为int或float数字类型   // 用于Stat HUD展示,如下图所示

    Stat GPU // 显示帧的GPU统计数据   注:android平台上没有输出

    为Memory类型  // 用于Stat HUD展示,如下图所示

    Stat Memory // 显示有关虚幻引擎中各个子系统使用多少内存的统计数据

    为hierarchy类别,可以嵌套子节点,包含CallCount、InclusiveTime、ExclusiveTime等字段   // 用于Stat HUD展示和Profiler工具CallStack展示  如下图所示

    Stat Component   // 显示组件列表及组件性能信息

    Profiler工具CallStack展示

    本文重点讲述如何用自定义hierarchy类别来埋点,并在Profiler工具的CallStack树中查看数据。

    定义分组

    DECLARE_STATS_GROUP(TEXT("AI"),STATGROUP_AI, STATCAT_Advanced);   // 3个参数分别对应Description、GroupName、GroupCategory

    // 展开后为如下结构体:
    struct FStatGroup_STATGROUP_AI
    {
        enum { DefaultEnable = true, CompileTimeEnable = true, SortByName = false };
    
        static __forceinline const char* GetGroupName() { return "STATGROUP_AI"; }
        static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
        static __forceinline const TCHAR* GetDescription() { return L"AI"; }
        static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
        static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
        static __forceinline bool GetSortByName() { return (bool)SortByName; }
    };;;

    DECLARE_STATS_GROUP_VERBOSE(TEXT("LoadTimeVerbose"), STATGROUP_LoadTimeVerbose, STATCAT_Advanced);  // 缺省不开启统计

    // 展开后为如下结构体:
    struct FStatGroup_STATGROUP_LoadTimeVerbose
    {
        enum { DefaultEnable = false, CompileTimeEnable = true, SortByName = false };
    
        static __forceinline const char* GetGroupName() { return "STATGROUP_LoadTimeVerbose"; }
        static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
        static __forceinline const TCHAR* GetDescription() { return L"LoadTimeVerbose"; }
        static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
        static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
        static __forceinline bool GetSortByName() { return (bool)SortByName; }
    };;;

    DECLARE_STATS_GROUP_SORTBYNAME(TEXT("Streaming Overview"),STATGROUP_StreamingOverview, STATCAT_Advanced); // 会进行排序,消耗会大一些

    // 展开后为如下结构体:
    struct FStatGroup_STATGROUP_StreamingOverview
    {
        enum { DefaultEnable = true, CompileTimeEnable = true, SortByName = true };
    
        static __forceinline const char* GetGroupName() { return "STATGROUP_StreamingOverview"; }
        static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
        static __forceinline const TCHAR* GetDescription() { return L"Streaming Overview"; }
        static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
        static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
        static __forceinline bool GetSortByName() { return (bool)SortByName; }
    };;;

    DECLARE_STATS_GROUP_MAYBE_COMPILED_OUT(TEXT("SlateVeryVerbose"), STATGROUP_SlateVeryVerbose, STATCAT_Advanced, WITH_VERY_VERBOSE_SLATE_STATS);  // 缺省不开启统计  宏WITH_VERY_VERBOSE_SLATE_STATS为0:表示不编译该统计项的逻辑

    // 展开后为如下结构体:
    struct FStatGroup_STATGROUP_SlateVeryVerbose
    {
        enum { DefaultEnable = false, CompileTimeEnable = 0, SortByName = false };
    
        static __forceinline const char* GetGroupName() { return "STATGROUP_SlateVeryVerbose"; }
        static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
        static __forceinline const TCHAR* GetDescription() { return L"SlateVeryVerbose"; }
        static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
        static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
        static __forceinline bool GetSortByName() { return (bool)SortByName; }
    };;;

    定义埋点方式1

    在cpp代码的全局区域,定义埋点结构体类型和static全局变量

    DECLARE_CYCLE_STAT(TEXT("Test1"), STAT_Test1, STATGROUP_TestGroup); // 3个参数分别对应Description、埋点结构体类型、GroupName

    // 宏展开后为:
    struct FStat_STAT_Test1
    {
        typedef FStatGroup_STATGROUP_TestGroup TGroup;
        static __forceinline const char* GetStatName() { return "STAT_Test1"; }
        static __forceinline const TCHAR* GetDescription() { return L"Test1"; }
        static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
        static __forceinline bool IsClearEveryFrame() { return true; }
        static __forceinline bool IsCycleStat() { return true; }
    
        static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
        {
            return FPlatformMemory::MCR_Invalid;
        }
    };;
    
    static struct FThreadSafeStaticStat<FStat_STAT_Test1> StatPtr_STAT_Test1;;

    在函数中插入埋点

    DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组

    DECLARE_CYCLE_STAT(TEXT("Test1"), STAT_Test1, STATGROUP_TestGroup); // 定义Test1的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test2"), STAT_Test2, STATGROUP_TestGroup); // 定义Test2的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test3"), STAT_Test3, STATGROUP_TestGroup); // 定义Test3的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test4"), STAT_Test4, STATGROUP_TestGroup); // 定义Test4的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test5"), STAT_Test5, STATGROUP_TestGroup); // 定义Test5的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test6"), STAT_Test6, STATGROUP_TestGroup); // 定义Test6的埋点类型与static埋点变量,并放在TestGroup分组中 // LoopCall(1)在我的电脑耗时约为16ms #define LoopCall(n) { uint64 sum = 1; for (int32 i = 1; i < 10000000*n; i++) { sum *= i; } } void AMyTest1Character::StatTest() { SCOPE_CYCLE_COUNTER(STAT_Test1); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test1((StatPtr_STAT_Test1.GetStatId()));; FPlatformProcess::Sleep(0.002); // 统计到CPU Stall - Sleep
    // 条件埋点: 条件成立时,才会埋点成功
    int a = 100; CONDITIONAL_SCOPE_CYCLE_COUNTER(STAT_Test2, a > 50); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test2(a > 50 ? (StatPtr_STAT_Test2.GetStatId()) : TStatId());; LoopCall(1); SCOPE_CYCLE_COUNTER(STAT_Test3); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test3((StatPtr_STAT_Test3.GetStatId()));; FPlatformProcess::Sleep(0.005); // 统计到CPU Stall - Sleep { SCOPE_CYCLE_COUNTER(STAT_Test6); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test6((StatPtr_STAT_Test6.GetStatId()));; } FPlatformProcess::Sleep(0.003); // 统计到CPU Stall - Sleep SCOPE_CYCLE_COUNTER(STAT_Test4); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test4((StatPtr_STAT_Test4.GetStatId()));; { SCOPE_CYCLE_COUNTER(STAT_Test5); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test5((StatPtr_STAT_Test5.GetStatId()));; LoopCall(2); } { SCOPE_CYCLE_COUNTER(STAT_Test5); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test5((StatPtr_STAT_Test5.GetStatId()));; FPlatformProcess::SleepNoStats(0.5); // 会被统计到当前作用域埋点的IncTime中 } SCOPE_CYCLE_COUNTER(STAT_Test6); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test6((StatPtr_STAT_Test6.GetStatId()));; }

    定义埋点方式2

    相比方式1,该方式不需要提前定义埋点类型,比较方便

    DECLARE_SCOPE_CYCLE_COUNTER(TEXT("UnhashUnreachableObjects"), STAT_UnhashUnreachableObjects, STATGROUP_GC)展开为如下代码:

    struct FStat_STAT_UnhashUnreachableObjects
    {
        typedef FStatGroup_STATGROUP_GC TGroup;
        static __forceinline const char* GetStatName() { return "STAT_UnhashUnreachableObjects"; }
        static __forceinline const TCHAR* GetDescription() { return L"UnhashUnreachableObjects"; }
        static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
        static __forceinline bool IsClearEveryFrame() { return true; }
        static __forceinline bool IsCycleStat() { return true; }
    
        static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
        {
            return FPlatformMemory::MCR_Invalid;
        }
    };;
    static struct FThreadSafeStaticStat<FStat_STAT_UnhashUnreachableObjects> StatPtr_STAT_UnhashUnreachableObjects;
    FScopeCycleCounter CycleCount_STAT_UnhashUnreachableObjects((StatPtr_STAT_UnhashUnreachableObjects.GetStatId()));;

    在函数中,定义埋点结构体类型和static局部变量,并插入埋点

    bool UnhashUnreachableObjects(bool bUseTimeLimit, float TimeLimit)
    {
        DECLARE_SCOPE_CYCLE_COUNTER(TEXT("UnhashUnreachableObjects"), STAT_UnhashUnreachableObjects, STATGROUP_GC);
        
        // ... ...
    }

    定义埋点方式3

    在DECLARE_SCOPE_CYCLE_COUNTER基础上封装,放到Quick分组下,更易于使用

    QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1);  // 即:DECLARE_SCOPE_CYCLE_COUNTER(TEXT("STAT_QuickTest1"),STAT_QuickTest1,STATGROUP_Quick)

                                                                                                  // 用宏QUICK_SCOPE_CYCLE_COUNTER定义的埋点会放到GroupName为FStatGroup_STATGROUP_Quick下

    QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1)展开为:

    struct FStat_STAT_QuickTest1
    {
        typedef FStatGroup_STATGROUP_Quick TGroup;
        static __forceinline const char* GetStatName() { return "STAT_QuickTest1"; }
        static __forceinline const TCHAR* GetDescription() { return L"STAT_QuickTest1"; }
        static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
        static __forceinline bool IsClearEveryFrame() { return true; }
        static __forceinline bool IsCycleStat() { return true; }
    
        static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
        {
            return FPlatformMemory::MCR_Invalid;
        }
    };;
    static struct FThreadSafeStaticStat<FStat_STAT_QuickTest1> StatPtr_STAT_QuickTest1;
    FScopeCycleCounter CycleCount_STAT_QuickTest1((StatPtr_STAT_QuickTest1.GetStatId()));;

    在函数中,定义埋点结构体类型和static局部变量,并插入埋点

    void AMyTest1Character::StatTest()
    {
        QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1);
    
        // ... ...
    }

    对UObject对象埋点

    DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组
    
    DECLARE_CYCLE_STAT(TEXT("ObjTest1"), STAT_ObjTest1, STATGROUP_TestGroup); // 定义ObjTest1的埋点类型与static埋点变量,并放在TestGroup分组中
    DECLARE_CYCLE_STAT(TEXT("ObjTest2"), STAT_ObjTest2, STATGROUP_TestGroup); // 定义ObjTest2的埋点类型与static埋点变量,并放在TestGroup分组中
    
    void AMyTest1Character::StatTest()
    {
        FString MyBPObjectPath = TEXT("/Game/ThirdPersonCPP/Blueprints/MyBlueprintObject.MyBlueprintObject_C");
        UClass* MyBPObjectClass = LoadClass<UObject>(nullptr, *MyBPObjectPath);
        UMyBPObject* BPObj1 = NewObject<UMyBPObject>(this, MyBPObjectClass);
    
        FString TexturePath1 = TEXT("/Engine/EngineMaterials/DefaultDiffuse_TC_Masks");
        UTexture2D* TextureObj1 = LoadObject<UTexture2D>(nullptr, *TexturePath1);
    
        {
            FScopeCycleCounterUObject ObjScope(MyBPObjectClass);
    
            LoopCall(1);
    
            FPlatformProcess::Sleep(0.002);
    
            {
                FScopeCycleCounterUObject ObjScope2(MyBPObjectClass);
    
                LoopCall(1);
    
                {
    // 通过GET_STATID宏来获取,会被展开为:(StatPtr_STAT_ObjTest1.GetStatId())
    TStatId StatBPObj1 = GET_STATID(STAT_ObjTest1); FScopeCycleCounterUObject ObjScope3(BPObj1, StatBPObj1); FPlatformProcess::SleepNoStats(
    0.5); } {
    // 通过GET_STATID宏来获取,会被展开为:(StatPtr_STAT_ObjTest2.GetStatId()) FScopeCycleCounterUObject ObjScope4(
    this, GET_STATID(STAT_ObjTest2)); FPlatformProcess::Sleep(0.003); } } LoopCall(2); // 动态创建TStatId对象 TStatId StatObjTest3 = FDynamicStats::CreateStatId<FStatGroup_STATGROUP_TestGroup>(FString(TEXT("ObjTest3"))); FScopeCycleCounterUObject ObjScope3(MyBPObjectClass, StatObjTest3); } }

    即使对UObject指定了埋点类型,但对UObject的统计会放到STATGROUP_UObjects分组中,如下所示:

    Tickable对象的Tick耗时

    UCLASS()
    class UMyBPObject : public UObject, public FTickableGameObject
    {
        GENERATED_BODY()
    public:
        UMyBPObject();
        ~UMyBPObject();
    
        virtual TStatId GetStatId() const override
        {
            RETURN_QUICK_DECLARE_CYCLE_STAT(MyBPObject, STATGROUP_Tickables); // 如果不希望被统计,直接返回return TStatId();即可
        }
        virtual bool IsTickable() const override { return !this->IsDefaultSubobject(); }
        virtual void Tick(float DeltaTime) override 
        {
            if (GFrameCounter % 300 == 0)
            {
                FPlatformProcess::SleepNoStats(0.03);
            }
        }
    };
    
    
    void AMyTest1Character::StatTest()
    {
        FString MyBPObjectPath = TEXT("/Game/ThirdPersonCPP/Blueprints/MyBlueprintObject.MyBlueprintObject_C");
        UClass* MyBPObjectClass = LoadClass<UObject>(nullptr, *MyBPObjectPath); // MyBPObjectClass为UBlueprintGeneratedClass*类型
    
        // 创建UMyBPObject对象,并赋值给成员变量UMyBPObject* m_BPObj1
        m_BPObj1 = NewObject<UMyBPObject>(this, MyBPObjectClass);
    }

    Tickable对象一般会放在STATGROUP_Tickables分组中。以下为某个Stats文件收集到的Tickable对象名称:

    Task执行任务的耗时

    DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组
    
    class FMyTestTask
    {
    public:
        FMyTestTask()
        {
        }
        static const TCHAR* GetTaskName()
        {
            return TEXT("FMyTestTask");
        }
        FORCEINLINE static TStatId GetStatId()
        {
            RETURN_QUICK_DECLARE_CYCLE_STAT(FMyTestTask, STATGROUP_TestGroup);
        }
        /** return the thread for this task **/
        static ENamedThreads::Type GetDesiredThread()
        {
            return ENamedThreads::AnyThread;
        }
    
        static ESubsequentsMode::Type GetSubsequentsMode()
        {
            return ESubsequentsMode::TrackSubsequents;
        }
    
        void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
        {
            LoopCall(5);
        }
    };
    
    void AMyTest1Character::StatTest()
    {
        FGraphEventRef MyTestTaskEvent = TGraphTask<FMyTestTask>::CreateTask().ConstructAndDispatchWhenReady();
    // 当前线程挂起,等待Task任务执行完成 FTaskGraphInterface::Get().WaitUntilTaskCompletes(MyTestTaskEvent); // 统计到CPU Stall - Wait For Event }

    TaskGraph的任务一般会放在STATGROUP_TaskGraphTasks和STATGROUP_ThreadPoolAsyncTasks分组中。以下为某个Stats文件收集到的Task名称:

    其他统计说明

    CPU停转的统计:

    Stat系统自己开销的统计:

    总结

    ① 通过定义线程安全的埋点变量,在其构造函数(从变量定义的地方开始记录)与析构函数(结束记录)中来计算埋点变量在生命周期范围的耗时

    ② 在记录数据时,会带上Thread Id。因此,在Profiler工具中展示数据时,会按照线程做大类进行分类

    ③ 在Profiler工具的CallStack树,是埋点变量之间的嵌套关系,与代码的函数调用CallStack没有关系

    参考

    https://docs.unrealengine.com/4.26/zh-CN/TestingAndOptimization/PerformanceAndProfiling/StatCommands/StatsSystemOverview/

    https://docs.unrealengine.com/4.26/en-US/TestingAndOptimization/PerformanceAndProfiling/StatCommands/StatsSystemOverview/

    UE高级性能剖析技术(2) -CPU帧率瓶颈和卡顿

  • 相关阅读:
    Oracle Drop表并未直接删除 drop table xx purge
    Notepad++使用
    Python使用MySQL数据库
    考驾照科目二科目三要点记录
    Linux中权限(r、w、x)对于目录与文件的意义
    linux之expr命令
    linux下scp
    数字货币和区块链联系
    网站
    关于linux 编程
  • 原文地址:https://www.cnblogs.com/kekec/p/14960139.html
Copyright © 2020-2023  润新知