• FMallocBinned2内存分配器


    FMallocBinned2是虚幻引擎实现的第二代装箱内存分配器,其重要的配置参数及成员变量如下:

    #define BINNED2_MAX_CACHED_OS_FREES (64)
    #if PLATFORM_64BITS
        #define BINNED2_MAX_CACHED_OS_FREES_BYTE_LIMIT (64*1024*1024) // 64MB
    #else
        #define BINNED2_MAX_CACHED_OS_FREES_BYTE_LIMIT (16*1024*1024)
    #endif
    
    #define BINNED2_LARGE_ALLOC                    65536        // Alignment of OS-allocated pointer - pool-allocated pointers will have a non-aligned pointer
    #define BINNED2_MINIMUM_ALIGNMENT_SHIFT        4            // Alignment of blocks, expressed as a shift
    #define BINNED2_MINIMUM_ALIGNMENT            16            // Alignment of blocks
    #define BINNED2_MAX_SMALL_POOL_SIZE            (32768-16)    // Maximum block size in GMallocBinned2SmallBlockSizes
    #define BINNED2_SMALL_POOL_COUNT            45
    
    
    #define DEFAULT_GMallocBinned2PerThreadCaches 1
    #define DEFAULT_GMallocBinned2LockFreeCaches 0
    #define DEFAULT_GMallocBinned2BundleCount 64
    #define DEFAULT_GMallocBinned2AllocExtra 32
    #define BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle 8
    
    #if !defined(AGGRESSIVE_MEMORY_SAVING)
        #error "AGGRESSIVE_MEMORY_SAVING must be defined"
    #endif
    #if AGGRESSIVE_MEMORY_SAVING
        #define DEFAULT_GMallocBinned2BundleSize 8192
    #else
        #define DEFAULT_GMallocBinned2BundleSize BINNED2_LARGE_ALLOC  // 64KB
    #endif
    
    
    #define BINNED2_ALLOW_RUNTIME_TWEAKING 0
    #if BINNED2_ALLOW_RUNTIME_TWEAKING
        extern CORE_API int32 GMallocBinned2PerThreadCaches;
        extern CORE_API int32 GMallocBinned2BundleSize = DEFAULT_GMallocBinned2BundleSize;
        extern CORE_API int32 GMallocBinned2BundleCount = DEFAULT_GMallocBinned2BundleCount;
        extern CORE_API int32 GMallocBinned2MaxBundlesBeforeRecycle = BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle;
        extern CORE_API int32 GMallocBinned2AllocExtra = DEFAULT_GMallocBinned2AllocExtra;
    #else
        #define GMallocBinned2PerThreadCaches DEFAULT_GMallocBinned2PerThreadCaches  // 1
        #define GMallocBinned2BundleSize DEFAULT_GMallocBinned2BundleSize  // 64KB
        #define GMallocBinned2BundleCount DEFAULT_GMallocBinned2BundleCount  // 64
        #define GMallocBinned2MaxBundlesBeforeRecycle BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle  // 8
        #define GMallocBinned2AllocExtra DEFAULT_GMallocBinned2AllocExtra  // 32
    #endif
    
    // ... ... 
    
    // Block sizes are based around getting the maximum amount of allocations per pool, with as little alignment waste as possible.
    // Block sizes should be close to even divisors of the system page size, and well distributed.
    // They must be 16-byte aligned as well.
    static uint16 SmallBlockSizes[] =  // 共45个档位的Block Size  当请求分配一个特定大小的内存块时,将寻找到最接近的一档
    {
        16, 32, 48, 64, 80, 96, 112, 128,
        160, 192, 224, 256, 288, 320, 384, 448,
        512, 576, 640, 704, 768, 896, 1024 - 16, 1168,
        1360, 1632, 2048 - 16, 2336, 2720, 3264, 4096 - 16, 4368,
        4672, 5040, 5456, 5952, 6544 - 16, 7280, 8192 - 16, 9360,
        10912, 13104, 16384 - 16, 21840, 32768 - 16 
    };// FFreeBlock用来描述一块Block,其本身也处于一个Block中,位于Block头部,要占一定空间sizeof(FFreeBlock)为16,最高一档是32768-16,这样64K空间中能存下两个最高档。
    
    // ... ...
    
    class CORE_API FMallocBinned2 final : public FMalloc
    {
    
    private:
        // ... ...
        FPtrToPoolMapping PtrToPoolMapping;  // 内存池哈希桶的相关参数
    
        // Pool tables for different pool sizes
        FPoolTable SmallPoolTables[BINNED2_SMALL_POOL_COUNT]; // 所有的内存池表列表, 单个内存池的Block尺寸是一样的  宏BINNED2_SMALL_POOL_COUNT为45
    
        PoolHashBucket* HashBuckets;  // Key命中时使用的内存池哈希桶
        PoolHashBucket* HashBucketFreeList; // Key未命中时使用的内存池哈希桶
        uint64 NumPoolsPerPage; // 一个Page中内存池的个数   65536除以sizeof(FPoolInfo) = 65536/32 = 2048
        // ... ...
        
        FCriticalSection Mutex; // 用于FScopeLock的临界段对象,实现对临界段的互斥访问
        
        // ... ...
    
    public:
        // ... ...
        static uint16 SmallBlockSizesReversed[BINNED2_SMALL_POOL_COUNT]; // this is reversed to get the smallest elements on our main cache line // 为SmallBlockSizes数组的反向数组
        static FMallocBinned2* MallocBinned2; // 当前Binned2内存分配器实例
        static uint32 Binned2TlsSlot; // 是否创建MallocBinned2的TLS Slot(为0表示未创建),所有线程共享同一个Binned2 TLS Slot
        static uint32 PageSize;  // Constants.BinnedPageSize  为64KB
        static uint32 OsAllocationGranularity;  // Constants.BinnedAllocationGranularity  为4096 
        // Mapping of sizes to small table indices  数组的个数为2048,存放的数据为:0,0,1,2,3,4,5,6,7,8,8, ... ,44,44,44,用于帮助计算当前内存Size在FPoolTable SmallPoolTables中的索引
    // Size到BlockSize的PoolTable索引映射表。UE4初始化阶段会预生成一个需要申请内存大小到PoolTable数组的索引的映射表,这样当Malloc时,根据Malloc需要的大小直接找到对应的PoolTable static uint8 MemSizeToIndex[1 + (BINNED2_MAX_SMALL_POOL_SIZE >> BINNED2_MINIMUM_ALIGNMENT_SHIFT)]; // ... ... };

    FMallocBinned2内存分配器的初始化 // 在其构造函数中

    根据所在平台硬件和操作系统,来设置内存分配器的相关参数

    FMallocBinned2::FMallocBinned2()
        : HashBucketFreeList(nullptr)
    {
        static bool bOnce = false;
        check(!bOnce); // this is now a singleton-like thing and you cannot make multiple copies
        bOnce = true;
    
        // 初始化SmallBlockSizesReversed数组,为SmallBlockSizes数组的反向数组
        for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index)
        {
            uint32 Partner = BINNED2_SMALL_POOL_COUNT - Index - 1;
            SmallBlockSizesReversed[Index] = SmallBlockSizes[Partner];
        }
        
        // 根据所在平台硬件和操作系统,来设置内存分配器的相关参数
        FGenericPlatformMemoryConstants Constants = FPlatformMemory::GetConstants();
        PageSize = Constants.BinnedPageSize;
        OsAllocationGranularity = Constants.BinnedAllocationGranularity ? Constants.BinnedAllocationGranularity : PageSize;
        NumPoolsPerPage = PageSize / sizeof(FPoolInfo);
        PtrToPoolMapping.Init(PageSize, NumPoolsPerPage, Constants.AddressLimit); // 初始化内存池哈希桶的相关参数
    
        // 一些check
        checkf(FMath::IsPowerOfTwo(PageSize), TEXT("OS page size must be a power of two"));
        checkf(FMath::IsPowerOfTwo(Constants.AddressLimit), TEXT("OS address limit must be a power of two"));
        checkf(Constants.AddressLimit > PageSize, TEXT("OS address limit must be greater than the page size")); // Check to catch 32 bit overflow in AddressLimit
        checkf(SmallBlockSizes[BINNED2_SMALL_POOL_COUNT - 1] == BINNED2_MAX_SMALL_POOL_SIZE, TEXT("BINNED2_MAX_SMALL_POOL_SIZE must equal the smallest block size"));
        checkf(PageSize % BINNED2_LARGE_ALLOC == 0, TEXT("OS page size must be a multiple of BINNED2_LARGE_ALLOC"));
        checkf(sizeof(FMallocBinned2::FFreeBlock) <= SmallBlockSizes[0], TEXT("Pool header must be able to fit into the smallest block"));
        static_assert(UE_ARRAY_COUNT(SmallBlockSizes) == BINNED2_SMALL_POOL_COUNT, "Small block size array size must match BINNED2_SMALL_POOL_COUNT");
        static_assert(UE_ARRAY_COUNT(SmallBlockSizes) <= 256, "Small block size array size must fit in a byte");
        static_assert(sizeof(FFreeBlock) <= BINNED2_MINIMUM_ALIGNMENT, "Free block struct must be small enough to fit into a block.");
    
        // Init pool tables.  填充SmallBlockSizes数组中BlockSize,共45档
        for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index)
        {
            checkf(Index == 0 || SmallBlockSizes[Index - 1] < SmallBlockSizes[Index], TEXT("Small block sizes must be strictly increasing"));
            checkf(SmallBlockSizes[Index] <= PageSize, TEXT("Small block size must be small enough to fit into a page"));
            checkf(SmallBlockSizes[Index] % BINNED2_MINIMUM_ALIGNMENT == 0, TEXT("Small block size must be a multiple of BINNED2_MINIMUM_ALIGNMENT"));
    
            SmallPoolTables[Index].BlockSize = SmallBlockSizes[Index];
        }
    
        // Set up pool mappings  数组的个数为2048,存放的数据为:0,0,1,2,3,4,5,6,7,8,8, ... ,44,44,44,用于帮助计算当前内存Size在FPoolTable SmallPoolTables中的索引
        uint8* IndexEntry = MemSizeToIndex;
        uint32  PoolIndex  = 0;
        for (uint32 Index = 0; Index != 1 + (BINNED2_MAX_SMALL_POOL_SIZE >> BINNED2_MINIMUM_ALIGNMENT_SHIFT); ++Index)
        {
            
            uint32 BlockSize = Index << BINNED2_MINIMUM_ALIGNMENT_SHIFT; // inverse of int32 Index = int32((Size >> BINNED2_MINIMUM_ALIGNMENT_SHIFT));
            while (SmallBlockSizes[PoolIndex] < BlockSize)
            {
                ++PoolIndex;
                check(PoolIndex != BINNED2_SMALL_POOL_COUNT);
            }
            check(PoolIndex < 256);
            *IndexEntry++ = uint8(PoolIndex);
        }
        
        
        // now reverse the pool sizes for cache coherency  // 再次初始化SmallBlockSizesReversed数组,为SmallBlockSizes数组的反向数组
    
        for (uint32 Index = 0; Index != BINNED2_SMALL_POOL_COUNT; ++Index)
        {
            uint32 Partner = BINNED2_SMALL_POOL_COUNT - Index - 1;
            SmallBlockSizesReversed[Index] = SmallBlockSizes[Partner];
        }
    
        uint64 MaxHashBuckets = PtrToPoolMapping.GetMaxHashBuckets();
    
        {
            LLM_PLATFORM_SCOPE(ELLMTag::FMalloc);
            // 为Key命中时使用的内存池哈希桶分配内存
            HashBuckets = (PoolHashBucket*)FPlatformMemory::BinnedAllocFromOS(Align(MaxHashBuckets * sizeof(PoolHashBucket), OsAllocationGranularity));
    #if BINNED2_ALLOCATOR_STATS
            Binned2HashMemory += Align(MaxHashBuckets * sizeof(PoolHashBucket), OsAllocationGranularity);
    #endif
        }
    
        DefaultConstructItems<PoolHashBucket>(HashBuckets, MaxHashBuckets); // 缺省构造与初始化HashBuckets
        MallocBinned2 = this;
        GFixedMallocLocationPtr = (FMalloc**)(&MallocBinned2);
    }

    具体数值如下:

    FPoolTable  // 同一Block大小内存池表

    /** 内存池表 sizeof(FPoolTable)为24*/
    struct FPoolTable
    {
        FPoolList ActivePools;  // 指向有空闲Block的内存池链表
        FPoolList ExhaustedPools; // 指向已满(没有可分配的内存)的内存池链表
        uint32    BlockSize;  // 当前PoolTable中所有内存池的Block大小
    
        // ... ...
    };

    FPoolList  // 内存池链表

    // 内存池链表 sizeof(FPoolList)为8
    struct FPoolList
    {
        // ... ...
    
    private:
        FPoolInfo* Front;
    };

    FPoolInfo  // 内存池

    FPoolInfo中的所有Block为空闲时,才释放其占用的内存页

    // 内存池  sizeof(FPoolInfo)为32
    struct FMallocBinned2::FPoolInfo  
    {
        // ... ...
     // 已分配的Block的个数  当为0时,将释放整个内存池及其FirstMem指向的内存块
     public:    uint16      Taken;          // Number of allocated elements in this pool, when counts down to zero can free the entire pool    
     public:    ECanary        Canary;    // See ECanary
     // 已分配的字节数
     private:    uint32      AllocSize;      // Number of bytes allocated
     // 如果是Bin模式,指向内存池可用的内存块Block链表; 如果非Bin模式, 指向由操作系统直接分配的内存块.
     public:    FFreeBlock* FirstFreeBlock; // Pointer to first free memory in this pool or the OS Allocation Size in bytes if this allocation is not binned
     // 指向下一个内存池
     public:    FPoolInfo*  Next;           // Pointer to next pool
     public:    FPoolInfo** PtrToPrevNext;  // Pointer to whichever pointer points to this pool
     
        // ... ...
    };

    FFreeBlock  // 内存块

    // 内存块  sizeof(FFreeBlock)为16
    struct FFreeBlock
    {
        // ... ...
        uint16 BlockSize;                // Size of the blocks that this list points to  // 所在Pool的BlockSize
        uint8 PoolIndex;                // Index of this pool  // 所在Pool的Index
        uint8 Canary;                    // Constant value of 0xe3  // 固定常量 用于判断内存是否越界写 以此判断这块Block数据是否损坏
        uint32 NumFreeBlocks;          // Number of consecutive free blocks here, at least 1.  // 空闲Block个数
        void*  NextFreeBlock;          // Next free block in another pool // 释放1个Block时,会构建该Block的FFreeMem,并插入到Pool->FirstMem链表的头部
    };

    PoolHashBucket  // 内存池哈希桶

    // 内存池哈希桶的相关参数  sizeof(FPtrToPoolMapping)为32
    struct FPtrToPoolMapping
    {
        // ... ...
    
    private:
        /** Shift to apply to a pointer to get the reference from the indirect tables */
        uint64 PtrToPoolPageBitShift;
    
        /** Shift required to get required hash table key. */
        uint64 HashKeyShift;
    
        /** Used to mask off the bits that have been used to lookup the indirect table */
        uint64 PoolMask;
    
        // PageSize dependent constants
        uint64 MaxHashBuckets;
    };
    
    /** 内存池哈希桶,用于存放由内存地址哈希出来的键对应的内存池链表  sizeof(PoolHashBucket)为32 */
    struct FMallocBinned2::PoolHashBucket
    {
        UPTRINT         BucketIndex; // 哈希键 Key=Ptr >> Allocator.HashKeyShift  内存地址右移27个bit位
        FPoolInfo*      FirstPool; // 指向内存池内存块(大小为64KB:成员变量PageSize的值)的起始处
        PoolHashBucket* Prev; // 上一个内存池哈希桶
        PoolHashBucket* Next; // 下一个内存池哈希桶
        
        // ... ...
    };

    从内存池Pool中分配内存给Block

    struct FMallocBinned2::FPoolInfo
    {
        // ... ...
        void* AllocateRegularBlock()
        {
            check(HasFreeRegularBlock()); // 检查FPoolInfo中是否有空闲Block
            ++Taken; // 已分配的Block数+1
            void* Result = FirstFreeBlock->AllocateRegularBlock(); // 分配Block
            ExhaustPoolIfNecessary(); // 如果当前FPoolInfo无空闲Block,则将其移动到FPoolList ExhaustedPools链表中
            return Result;
        }
        
        // ... ...
    };
    
    
    struct FFreeBlock
    {
        // ... ...
        FORCEINLINE void* AllocateRegularBlock()
        {
            --NumFreeBlocks;  // 空闲Block个数减1
            if (IsAligned(this, BINNED2_LARGE_ALLOC)) // BINNED2_LARGE_ALLOC为64KB  FFreeBlock起始处是否对齐到64KB
            {
                return (uint8*)this + BINNED2_LARGE_ALLOC - (NumFreeBlocks + 1) * BlockSize; // +64KB后,从前往后分配Block
            }
            return (uint8*)this + (NumFreeBlocks)* BlockSize; // 从后往前分配Block
        }
        
        // ... ...
    };

    TLS Cache机制

    FMallocBinned内存分配器相比,FMallocBinned2最大的改进:

    引入了TLS(Thread Local Storage线程局部存储。线程可以有自己的存储空间,以键值对形式存储一些自己独有的变量)缓存,来优化内存的分配速度

    各线程会记录被free的地址,把它们保存到一个列表中,当这个线程再有malloc请求来时,如果BlockSize匹配,则直接返回之前缓存的free地址

    这样就不需要再访问FPoolTable SmallPoolTables[BINNED2_SMALL_POOL_COUNT]了,因此也不用再加互斥锁了

    各个线程在启动时,通过调用FMemory::SetupTLSCachesOnCurrentThread()创建自己的TLS数据FPerThreadFreeBlockLists

    各个线程在创建FPerThreadFreeBlockLists后,都会把它添加到Binned2的RegisteredFreeBlockLists数组中记录。代码如下:

    void FMallocBinned2::SetupTLSCachesOnCurrentThread()
    {
        if (!BINNED2_ALLOW_RUNTIME_TWEAKING && !GMallocBinned2PerThreadCaches)
        {
            return;
        }
        if (!FMallocBinned2::Binned2TlsSlot)
        {
            FMallocBinned2::Binned2TlsSlot = FPlatformTLS::AllocTlsSlot(); // 只会执行一次,TLS Slot全局唯一
        }
        check(FMallocBinned2::Binned2TlsSlot);
        FPerThreadFreeBlockLists::SetTLS(); // 各线程创建自己的TLS
    }
    
    
    void FMallocBinned2::FPerThreadFreeBlockLists::SetTLS()
    {
        check(FMallocBinned2::Binned2TlsSlot);
        FPerThreadFreeBlockLists* ThreadSingleton = (FPerThreadFreeBlockLists*)FPlatformTLS::GetTlsValue(FMallocBinned2::Binned2TlsSlot);
        if (!ThreadSingleton)
        {
            LLM_PLATFORM_SCOPE(ELLMTag::FMalloc);
            ThreadSingleton = new (FPlatformMemory::BinnedAllocFromOS(Align(sizeof(FPerThreadFreeBlockLists), FMallocBinned2::OsAllocationGranularity))) FPerThreadFreeBlockLists();
    #if BINNED2_ALLOCATOR_STATS
            Binned2TLSMemory += Align(sizeof(FPerThreadFreeBlockLists), FMallocBinned2::OsAllocationGranularity);
    #endif
            FPlatformTLS::SetTlsValue(FMallocBinned2::Binned2TlsSlot, ThreadSingleton);
            FMallocBinned2::Private::RegisterThreadFreeBlockLists(ThreadSingleton);
        }
    }
    
    static TArray<FPerThreadFreeBlockLists*>& GetRegisteredFreeBlockLists()
    {
        static TArray<FPerThreadFreeBlockLists*> RegisteredFreeBlockLists;
        return RegisteredFreeBlockLists;
    }
    static void RegisterThreadFreeBlockLists( FPerThreadFreeBlockLists* FreeBlockLists )
    {
        FScopeLock Lock(&GetFreeBlockListsRegistrationMutex());
    #if BINNED2_ALLOCATOR_STATS_VALIDATION
        ++RecursionCounter;
    #endif
        GetRegisteredFreeBlockLists().Add(FreeBlockLists);
    #if BINNED2_ALLOCATOR_STATS_VALIDATION
        --RecursionCounter;
    #endif
    }

    各线程调用FMemory::SetupTLSCachesOnCurrentThread()的情况:

    每个线程都会有一份FPerThreadFreeBlockLists副本,其中FreeLists[]数组也通过BlockSize产生,有45个元素

    每个元素类型为FFreeBlockList,包含FBundle PartialBundle链表(未装满的Bundle)和FBundle FullBundle链表(已装满的Bundle)

    为了让Binned2不过多占用内存空间,给每个FBundle限定了元素数量(不能超过64)及总容量(其中FBundleNode的Count*BlockSize不能大于64KB)

    struct FPerThreadFreeBlockLists
    {
        // ... ...
    private:
        FFreeBlockList FreeLists[BINNED2_SMALL_POOL_COUNT]; // 宏BINNED2_SMALL_POOL_COUNT为45  对应各个档位的Block
    };
    
    // sizeof(FFreeBlockList)为32
    struct FFreeBlockList
    {
        // ... ...
    private:
        FBundle PartialBundle;
        FBundle FullBundle;
    };
    
    // FBundleNode链表  sizeof(FBundle)为16
    struct FBundle
    {
        // ... ...
        FBundleNode* Head;  // 链表头指针
        uint32       Count;
    };
    
    // sizeof(FBundleNode)为16
    struct FBundleNode
    {
        FBundleNode* NextNodeInCurrentBundle;
        union
        {
            FBundleNode* NextBundle;
            int32 Count;
        };
    };

    注:FBundleNode*指向的是Block内存块区域,把Ptr指针转行成FBundleNode*后,其size为16字节,对其修改不会影响到其他内存空间 

    FGlobalRecycler  // 用于缓存FBundle FullBundle链表

    struct FGlobalRecycler  // sizeof(FGlobalRecycler)为64*45 = 2880
    {
    
        // ... ...
    
    private:
        struct FPaddedBundlePointer // sizeof(FPaddedBundlePointer)为8*8 = 64
        {
            FBundleNode* FreeBundles[BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle]; // 宏BINNED2_MAX_GMallocBinned2MaxBundlesBeforeRecycle为8
    
            // ... ...
        };
        
        // ... ...
        
        MS_ALIGN(PLATFORM_CACHE_LINE_SIZE) FPaddedBundlePointer Bundles[BINNED2_SMALL_POOL_COUNT] GCC_ALIGN(PLATFORM_CACHE_LINE_SIZE); // 宏BINNED2_SMALL_POOL_COUNT为45
    };
    
    static FGlobalRecycler GGlobalRecycler;  // 用于缓存FBundle FullBundle链表   各Block档允许有8个元素。有空位时,PushBundle会成功,否则失败。全部为空时,PopBundle失败并返回null。

    Malloc申请内存时TLS Cache的流程细节如下:

    Free释放内存时TLS Cache的流程细节如下:

    Free掉Ptr指针的内存占用  

    void FMallocBinned2::FreeExternal(void* Ptr)
    {
        if (!IsOSAllocation(Ptr)) // 是否为操作系统直接分配的内存块
        {
            // Bin模式,内存池
            check(Ptr); // null is 64k aligned so we should not be here
            FFreeBlock* BasePtr = GetPoolHeaderFromPointer(Ptr); // 将指针转换成FFreeBlock*
            BasePtr->CanaryTest();
            uint32 BlockSize = BasePtr->BlockSize;
            uint32 PoolIndex = BasePtr->PoolIndex;
    
            FBundleNode* BundlesToRecycle = nullptr;
            FPerThreadFreeBlockLists* Lists = GMallocBinned2PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr;
            if (Lists)
            {
                // 若FPerThreadFreeBlockLists[BlockSize].FullBundle.Head不为空,会加入GGlobalRecycler对应Block档位的数组中缓存
                // 当GGlobalRecycler对应Block档位没有空位时,BundlesToRecycle会被赋值为FPerThreadFreeBlockLists[BlockSize].FullBundle.Head
                // 当GGlobalRecycler对应Block档位有空位时,会被添加进去,并返回空
                BundlesToRecycle = Lists->RecycleFullBundle(BasePtr->PoolIndex); 
                bool bPushed = Lists->Free(Ptr, PoolIndex, BlockSize); // 加到FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head链表的头部
                check(bPushed);
    #if BINNED2_ALLOCATOR_STATS
                Lists->AllocatedMemory -= BlockSize;
    #endif
            }
            else
            {
                BundlesToRecycle = (FBundleNode*)Ptr;
                BundlesToRecycle->NextNodeInCurrentBundle = nullptr;
            }
            if (BundlesToRecycle) // 若不为空
            {
                BundlesToRecycle->NextBundle = nullptr;
                FScopeLock Lock(&Mutex);
                Private::FreeBundles(*this, BundlesToRecycle, BlockSize, PoolIndex); // 释放BundlesToRecycle地址所占用的Block内存
    #if BINNED2_ALLOCATOR_STATS
                if (!Lists)
                {
                    // lists track their own stat track them instead in the global stat if we don't have lists
                    AllocatedSmallPoolMemory -= ((int64)(BlockSize));
                }
    #endif
            }
        }
        else if (Ptr)
        {
            // 非Bin模式, 操作系统直接分配
            FScopeLock Lock(&Mutex);
            FPoolInfo* Pool = Private::FindPoolInfo(*this, Ptr);
            if (!Pool)
            {
                UE_LOG(LogMemory, Fatal, TEXT("FMallocBinned2 Attempt to free an unrecognized block %p"), Ptr);
            }
            UPTRINT PoolOsBytes = Pool->GetOsAllocatedBytes();
            SIZE_T PoolOSRequestedBytes = Pool->GetOSRequestedBytes();
    
    #if BINNED2_ALLOCATOR_STATS
            AllocatedLargePoolMemory -= ((int64)PoolOSRequestedBytes);
            AllocatedLargePoolMemoryWAlignment -= ((int64)PoolOsBytes);
    #endif
    
            checkf(PoolOSRequestedBytes <= PoolOsBytes, TEXT("FMallocBinned2::FreeExternal %d %d"), int32(PoolOSRequestedBytes), int32(PoolOsBytes));
            Pool->SetCanary(FPoolInfo::ECanary::Unassigned, true, false);
            // Free an OS allocation.
            CachedOSPageAllocator.Free(Ptr, PoolOsBytes);
        }
    }
    
    
    static void FreeBundles(FMallocBinned2& Allocator, FBundleNode* BundlesToRecycle, uint32 InBlockSize, uint32 InPoolIndex)
    {
        FPoolTable& Table = Allocator.SmallPoolTables[InPoolIndex];
    
        // 释放FBundleNode* BundlesToRecycle链表上各节点指向的内存
        FBundleNode* Bundle = BundlesToRecycle;
        while (Bundle)
        {
            FBundleNode* NextBundle = Bundle->NextBundle;
    
            FBundleNode* Node = Bundle;
            do
            {
                FBundleNode* NextNode = Node->NextNodeInCurrentBundle;
                FPoolInfo*   NodePool = FindPoolInfo(Allocator, Node);
                if (!NodePool)
                {
                    UE_LOG(LogMemory, Fatal, TEXT("FMallocBinned2 Attempt to free an unrecognized small block %p"), Node);
                }
                NodePool->CheckCanary(FPoolInfo::ECanary::FirstFreeBlockIsPtr);
    
                // If this pool was exhausted, move to available list.
                if (!NodePool->FirstFreeBlock) // FPoolInfo* NodePool在FPoolList ExhaustedPools链表上时
                {
                    Table.ActivePools.LinkToFront(NodePool); // 将FPoolInfo* NodePool从FPoolList ExhaustedPools移动到FPoolList ActivePools
                }
                else
                {
                    check(NodePool->FirstFreeBlock->Canary == 0 || NodePool->FirstFreeBlock->IsCanaryOk());
                }
    
                // Free a pooled allocation.  在FBundleNode* Node地址处,构建一个新的FFreeBlock,NumFreeBlocks设置为1,并插入到NodePool->FirstFreeBlock链表的头部
                FFreeBlock* Free = (FFreeBlock*)Node;
                Free->NumFreeBlocks = 1;
                Free->NextFreeBlock = NodePool->FirstFreeBlock;
                Free->BlockSize     = InBlockSize;
                Free->Canary = FFreeBlock::CANARY_VALUE;
                Free->PoolIndex = InPoolIndex;
                NodePool->FirstFreeBlock   = Free;
    
                // Free this pool.
                check(NodePool->Taken >= 1);
                if (--NodePool->Taken == 0) // FPoolInfo* NodePool中所有Block为空闲时
                {
                    NodePool->SetCanary(FPoolInfo::ECanary::Unassigned, true, false);
                    FFreeBlock* BasePtrOfNode = GetPoolHeaderFromPointer(Node);
    
                    // Free the OS memory.
                    NodePool->Unlink(); // 从FPoolList链表上断开
                    Allocator.CachedOSPageAllocator.Free(BasePtrOfNode, Allocator.PageSize); // 回收整个FPoolInfo* NodePool的内存
    #if BINNED2_ALLOCATOR_STATS
                    AllocatedOSSmallPoolMemory -= ((int64)Allocator.PageSize);
    #endif
                }
    
                Node = NextNode; // 遍历下一个FBundleNode*
            } while (Node);
    
            Bundle = NextBundle;
        }
    }

    Malloc分配内存

    FORCEINLINE void* MallocSelect(SIZE_T Size, uint32 Alignment)
    {
        void* Result;
    
        if (UseSmallAlloc(Size, Alignment)) // Size <= BINNED2_MAX_SMALL_POOL_SIZE & Alignment <= BINNED2_MINIMUM_ALIGNMENT
        {
            Result = MallocExternalSmall(Size, Alignment); // 使用内存池来分配内存
        }
        else
        {
            Result = MallocExternalLarge(Size, Alignment); // 由操作系统直接分配内存, 且放入HashBuckets表中
        }
    
        return Result;
    }
    
    
    void* FMallocBinned2::MallocExternalSmall(SIZE_T Size, uint32 Alignment)
    {
        uint32 PoolIndex = BoundSizeToPoolIndex(Size); // 根据Size获取在FPoolTable SmallPoolTables中的索引
        
        // 优先从TLS Cache中获取可用的内存Block
        FPerThreadFreeBlockLists* Lists = GMallocBinned2PerThreadCaches ? FPerThreadFreeBlockLists::Get() : nullptr;
        if (Lists)
        {
            // 若FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head链表为空,从GGlobalRecycler对应Block档位的数组中Pop出一个并赋值给FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head
            // 然后再判断FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head是否为空
            if (Lists->ObtainRecycledPartial(PoolIndex)) 
            {
                if (void* Result = Lists->Malloc(PoolIndex)) // 从FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head链表头部Pop出一个FBundleNode*,并赋值给Result
                {
    #if BINNED2_ALLOCATOR_STATS
                    uint32 BlockSize = PoolIndexToBlockSize(PoolIndex);
                    Lists->AllocatedMemory += BlockSize;
    #endif
                    return Result; // 有对应PoolIndex的TLS Cache的Block,则直接分配出去
                }
            }
        }
    
        FScopeLock Lock(&Mutex); // 获取互斥锁,离开作用域自动释放互斥锁
    
        // Allocate from small object pool.
        FPoolTable& Table = SmallPoolTables[PoolIndex]; // 根据PoolIndex找到对应的PoolTable
    
        FPoolInfo* Pool;
        if (!Table.ActivePools.IsEmpty()) // 当前内存池表中的ActivePools不为空
        {
            Pool = &Table.ActivePools.GetFrontPool();  // 获取第一个FPoolInfo
        }
        else
        {
            Pool = &Table.ActivePools.PushNewPoolToFront(*this, Table.BlockSize, PoolIndex); // 创建一个新的FPoolInfo
        }
    
        void* Result = Pool->AllocateRegularBlock(); // 从FPoolInfo Pool分配一个Block  注:函数中会对Pool的空闲检查,如果已无空闲Block,则把Pool添加到PoolTable的ExhaustedPools中
    #if BINNED2_ALLOCATOR_STATS
        AllocatedSmallPoolMemory += PoolIndexToBlockSize(PoolIndex);
    #endif // BINNED2_ALLOCATOR_STATS
        if (GMallocBinned2AllocExtra) // GMallocBinned2AllocExtra值为32  这部分逻辑为TLS Cache优化逻辑
        {
            if (Lists)
            {
                // prefill the free list with some allocations so we are less likely to hit this slow path with the mutex 
                for (int32 Index = 0; Index < GMallocBinned2AllocExtra && Pool->HasFreeRegularBlock(); Index++)
                {
                    if (!Lists->Free(Result, PoolIndex, Table.BlockSize)) // Free成功,会将当前Result指针加入到FPerThreadFreeBlockLists[BlockSize].PartialBundle.Head链表头部
                    {
                        break;
                    }
                    Result = Pool->AllocateRegularBlock(); // 从FPoolInfo Pool分配一个Block
                }
            }
        }
        if (!Pool->HasFreeRegularBlock()) // 当前Pool是否有空闲Block
        {
            Table.ExhaustedPools.LinkToFront(Pool); // 移动到FPoolList ExhaustedPools链表中
        }
    
        return Result;
    }

    Android(小米10)DumpPlatformAndAllocatorStats统计信息:

    [2021.05.27-15.59.49:152][ 66]LogMemory: Platform Memory Stats for Android
    [2021.05.27-15.59.49:152][ 66]LogMemory: Process Physical Memory: 1207.08 MB used, 1254.11 MB peak
    [2021.05.27-15.59.49:152][ 66]LogMemory: Process Virtual Memory: 8984.62 MB used, 9077.56 MB peak
    [2021.05.27-15.59.49:152][ 66]LogMemory: Physical Memory: 5445.78 MB used,  2177.80 MB free, 7623.57 MB total
    [2021.05.27-15.59.49:153][ 66]LogMemory: Virtual Memory: 608.03 MB used,  1439.97 MB free, 2048.00 MB total
    [2021.05.27-15.59.49:153][ 66]LogMemory: PageSize: 4096, BinnedPageSize: 65536, BinnedAllocationGranularity: 4096, AddressLimit: 8589934592
    [2021.05.27-15.59.49:154][ 66]FMallocBinned2 Mem report
    [2021.05.27-15.59.49:154][ 66]Constants.BinnedPageSize = 65536
    [2021.05.27-15.59.49:154][ 66]Constants.BinnedAllocationGranularity = 4096
    [2021.05.27-15.59.49:154][ 66]Small Pool Allocations: 388.752121mb  (including block size padding)
    [2021.05.27-15.59.49:155][ 66]Small Pool OS Allocated: 419.000000mb
    [2021.05.27-15.59.49:155][ 66]Large Pool Requested Allocations: 204.530167mb
    [2021.05.27-15.59.49:155][ 66]Large Pool OS Allocated: 205.332031mb
    [2021.05.27-15.59.49:155][ 66]Requested Allocations: 204.530167mb
    [2021.05.27-15.59.49:155][ 66]OS Allocated: 205.332031mb
    [2021.05.27-15.59.49:155][ 66]PoolInfo: 1.687500mb
    [2021.05.27-15.59.49:155][ 66]Hash: 0.003906mb
    [2021.05.27-15.59.49:156][ 66]TLS: 0.066406mb
    [2021.05.27-15.59.49:156][ 66]Total allocated from OS: 626.089844mb
    [2021.05.27-15.59.49:156][ 66]Cached free OS pages: 3.894531mb

    PC下DumpPlatformAndAllocatorStats统计信息:

    [2021.06.04-06.12.34:488][748]LogMemory: Platform Memory Stats for Windows
    [2021.06.04-06.12.34:488][748]LogMemory: Process Physical Memory: 704.69 MB used, 775.71 MB peak
    [2021.06.04-06.12.34:488][748]LogMemory: Process Virtual Memory: 784.52 MB used, 888.80 MB peak
    [2021.06.04-06.12.34:488][748]LogMemory: Physical Memory: 24035.47 MB used,  8565.63 MB free, 32601.11 MB total
    [2021.06.04-06.12.34:488][748]LogMemory: Virtual Memory: 134206408.00 MB used,  11316.60 MB free, 134217728.00 MB total
    [2021.06.04-06.12.34:489][748]FMallocBinned2 Mem report
    [2021.06.04-06.12.34:489][748]Constants.BinnedPageSize = 65536
    [2021.06.04-06.12.34:489][748]Constants.BinnedAllocationGranularity = 4096
    [2021.06.04-06.12.34:489][748]Small Pool Allocations: 130.058121mb  (including block size padding)
    [2021.06.04-06.12.34:489][748]Small Pool OS Allocated: 157.312500mb
    [2021.06.04-06.12.34:489][748]Large Pool Requested Allocations: 141.529739mb
    [2021.06.04-06.12.34:489][748]Large Pool OS Allocated: 141.667969mb
    [2021.06.04-06.12.34:489][748]Requested Allocations: 141.529739mb
    [2021.06.04-06.12.34:489][748]OS Allocated: 141.667969mb
    [2021.06.04-06.12.34:489][748]PoolInfo: 0.500000mb
    [2021.06.04-06.12.34:489][748]Hash: 0.007813mb
    [2021.06.04-06.12.34:489][748]TLS: 0.128906mb
    [2021.06.04-06.12.34:489][748]Total allocated from OS: 299.617188mb
    [2021.06.04-06.12.34:490][748]Cached free OS pages: 34.992188mb

    参考

    UE4 MallocBinned2分配器

    People Mountain People Sea(服务器篇)

  • 相关阅读:
    编码导致 html和aspx 样式差异,变形
    Recommand of the Day:Names in English
    NSBundle常用方法及解释
    在K8S 中部署 Spring Boot 应用,爽!
    如果抛开 Spring,如何自己实现 AOP?面试必问。。。
    为什么阿里强制 boolean 类型变量不能使用 is 开头?
    CTO 说禁用 Lombok,看我怼死他。。
    面试官:什么是 YAML?和 Spring Boot 有什么关系?
    面试官:线程池多余的线程是如何回收的?
    JetBrains 发布下一代 IDE,无比轻量,几秒就能启动干活,IDEA 可以扔了。。
  • 原文地址:https://www.cnblogs.com/kekec/p/14675228.html
Copyright © 2020-2023  润新知