• 各种内存分配器的对比测试


    最近两天测试了下tcmalloc,性能的确牛B.

    所以修改了下固定对象分配器,模仿tcmalloc利用tls做thread cache.

    下面是在我机器上对自己写的各个内存分配器与tcmalloc的对比测试,

    fix_obj_pool finish:326
    fix_obj_pool finish:165
    fix_obj_pool finish:168
    fix_obj_pool finish:164
    fix_obj_pool finish:174
    fix_obj_pool finish:164
    fix_obj_pool finish:174
    fix_obj_pool finish:185
    fix_obj_pool finish:173
    fix_obj_pool finish:168
    gen_allocator finish:567
    gen_allocator finish:264
    gen_allocator finish:261
    gen_allocator finish:260
    gen_allocator finish:260
    gen_allocator finish:261
    gen_allocator finish:260
    gen_allocator finish:261
    gen_allocator finish:260
    gen_allocator finish:263
    block_obj_allocator finish:342
    block_obj_allocator finish:257
    block_obj_allocator finish:258
    block_obj_allocator finish:257
    block_obj_allocator finish:258
    block_obj_allocator finish:257
    block_obj_allocator finish:258
    block_obj_allocator finish:259
    block_obj_allocator finish:263
    block_obj_allocator finish:262
    tcmalloc finish:279
    tcmalloc finish:266
    tcmalloc finish:265
    tcmalloc finish:267
    tcmalloc finish:266
    tcmalloc finish:266
    tcmalloc finish:265
    tcmalloc finish:264
    tcmalloc finish:266
    tcmalloc finish:267
    test1 finish------------
    fix_obj_pool finish:606
    fix_obj_pool finish:471
    fix_obj_pool finish:469
    fix_obj_pool finish:473
    fix_obj_pool finish:468
    fix_obj_pool finish:468
    fix_obj_pool finish:470
    fix_obj_pool finish:474
    fix_obj_pool finish:475
    fix_obj_pool finish:467
    gen_allocator finish:928
    gen_allocator finish:647
    gen_allocator finish:677
    gen_allocator finish:643
    gen_allocator finish:645
    gen_allocator finish:644
    gen_allocator finish:643
    gen_allocator finish:644
    gen_allocator finish:643
    gen_allocator finish:644
    block_obj_allocator finish:586
    block_obj_allocator finish:500
    block_obj_allocator finish:502
    block_obj_allocator finish:500
    block_obj_allocator finish:502
    block_obj_allocator finish:501
    block_obj_allocator finish:501
    block_obj_allocator finish:501
    block_obj_allocator finish:501
    block_obj_allocator finish:501
    tcmalloc finish:551
    tcmalloc finish:549
    tcmalloc finish:549
    tcmalloc finish:549
    tcmalloc finish:551
    tcmalloc finish:549
    tcmalloc finish:548
    tcmalloc finish:551
    tcmalloc finish:549
    tcmalloc finish:550
    test2 finish------------
    fix_obj_pool finish:464
    fix_obj_pool finish:466
    fix_obj_pool finish:464
    fix_obj_pool finish:465
    fix_obj_pool finish:465
    fix_obj_pool finish:466
    fix_obj_pool finish:465
    fix_obj_pool finish:464
    fix_obj_pool finish:467
    fix_obj_pool finish:465
    gen_allocator finish:674
    gen_allocator finish:661
    gen_allocator finish:667
    gen_allocator finish:656
    gen_allocator finish:657
    gen_allocator finish:658
    gen_allocator finish:658
    gen_allocator finish:660
    gen_allocator finish:657
    gen_allocator finish:660
    block_obj_allocator finish:479
    block_obj_allocator finish:479
    block_obj_allocator finish:477
    block_obj_allocator finish:477
    block_obj_allocator finish:478
    block_obj_allocator finish:480
    block_obj_allocator finish:478
    block_obj_allocator finish:481
    block_obj_allocator finish:477
    block_obj_allocator finish:478
    tcmalloc finish:562
    tcmalloc finish:565
    tcmalloc finish:563
    tcmalloc finish:562
    tcmalloc finish:562
    tcmalloc finish:563
    tcmalloc finish:566
    tcmalloc finish:565
    tcmalloc finish:562
    tcmalloc finish:562
    test3 finish------------

    三个测试分别是

    1)分配 1000万个16字节的对象

    2)分配1000万,再释放1000万

    3)分配10万,释放10万,执行1000万/10万次

    从输出可以看出fix_obj_pool 的第一个测试是最快的,因为它的分配处理最简单,但如果把释放也计入统计,优势就几乎没有了。

    还有一个手段可以优化fix_obj_pool,就是释放时不将对象放回到可用列表中,只是增加一个计数,当整个内存块中的对象都被释放时

    才将内存放回到可用列表中去。而gen_allocator效果是最差的,可以直接丢弃了。

    从事测试结果可以看出,tcmalloc已经可以满足大多数的需求,基本无必要自己写通用内存分配器。当然对象池还是可以考虑的。

    代码如下:

    block_obj_allocator.h

    #ifndef _BLOCK_OBJ_ALLOCATOR
    #define _BLOCK_OBJ_ALLOCATOR
    
    
    typedef struct block_obj_allocator *block_obj_allocator_t;
    
    block_obj_allocator_t create_block_obj_allocator();
    
    void print_info(block_obj_allocator_t,int);
    
    #endif

    block_obj_allocator.c

    #include "block_obj_allocator.h"
    #include <pthread.h>
    #include "link_list.h"
    #include <stdint.h>
    #include <assert.h>
    #include "spinlock.h"
    #include <stdlib.h>
    #include "clib/include/allocator.h"
    
    struct free_list{
        list_node next;
        uint32_t  size;
        uint32_t  init_size;
        list_node *head;
        list_node *tail;
        void *mem;
    };
    
    struct thread_allocator
    {
        list_node next;
        block_obj_allocator_t central_allocator;
        struct link_list *_free_list;
        struct link_list *_recover;
        uint32_t   free_size;
        uint16_t   array_idx;
        uint32_t   collect_factor;
    };
    
    struct thread_cache
    {
        list_node next;
        struct thread_allocator _allocator[17];
    };
    
    struct block_obj_allocator
    {
        IMPLEMEMT(allocator);
        pthread_key_t t_key;
        spinlock_t _free_list_mtx[17];
        struct link_list *_free_list[17];
        spinlock_t mtx;
        struct link_list *thread_caches;
    };
    
    
    static void *free_list_get(struct free_list *f)
    {
        void *ptr = (void*)f->head;
        f->head = f->head->next;
        if(!f->head)
            f->tail = NULL;
        --f->size;    
        return ptr;
    }
    
    static void free_list_put(struct free_list *f,void *ptr)
    {
        list_node *l = (list_node*)ptr;
        l->next = NULL;
        if(f->tail)
        {
            f->tail->next = l;
            f->tail = l;
        }
        else
            f->head = f->tail = l;
        ++f->size;
    }
    
    #define DEFAULT_BLOCK_SIZE 1024*1024
    
    static struct free_list *creat_new_freelist(uint32_t size)
    {
        uint32_t init_size = DEFAULT_BLOCK_SIZE/size;
        struct free_list *f = (struct free_list*)calloc(1,sizeof(*f));
        assert(f);
        f->mem = calloc(1,DEFAULT_BLOCK_SIZE);
        assert(f->mem);
        f->init_size = f->size = init_size;
        int32_t i = 0;
        for( ; i < init_size; ++i)
        {
            list_node *l = (list_node*)(((uint8_t*)f->mem)+(i*size));
            free_list_put(f,l);
        }
        f->size = init_size;
        return f;
    
    }
    
    static struct free_list *central_get_freelist(block_obj_allocator_t central,uint16_t array_idx)
    {
        //printf("central_get_freelist\n");
        struct free_list *f;
        spin_lock(central->_free_list_mtx[array_idx],4000);
        f = (struct free_list*)link_list_pop(central->_free_list[array_idx]);
        spin_unlock(central->_free_list_mtx[array_idx]);
        if(!f)
        {
            //printf("creat_new_freelist\n");
             f = creat_new_freelist(1<<array_idx);            
        }
        return f;
    }
    
    static void give_back_to_central(block_obj_allocator_t central,uint16_t array_idx,struct free_list *f)
    {
        //printf("give_back_to_central\n");
        spin_lock(central->_free_list_mtx[array_idx],4000);
        LINK_LIST_PUSH_BACK(central->_free_list[array_idx],f);
        spin_unlock(central->_free_list_mtx[array_idx]);
    }
    
    
    void *thread_allocator_alloc(struct thread_allocator *a)
    {
        void *ptr;
        struct free_list *f;
        if(!a->free_size)
        {
            //thread cache不够内存了,从central获取
            f = central_get_freelist(a->central_allocator,a->array_idx);
            assert(f);
            LINK_LIST_PUSH_BACK(a->_free_list,f);
            a->free_size += f->size;
        }
        else
        {
            f = (struct free_list*)link_list_head(a->_free_list);
            if(!f)
            {
                f = (struct free_list*)link_list_pop(a->_recover);
                LINK_LIST_PUSH_BACK(a->_free_list,f);
            }
        }
        ptr = free_list_get(f);
        assert(ptr);
        --a->free_size;
        if(!f->size)
        {
            link_list_pop(a->_free_list);
            link_list_push_back(a->_recover,(list_node*)f);
        }
        return ptr;
    }
    
    void thread_allocator_dealloc(struct thread_allocator *a,void *ptr)
    {
        struct free_list *f = (struct free_list*)link_list_head(a->_recover);
        if(f)
        {
            free_list_put(f,ptr);
            ++a->free_size;
            if(f->size == f->init_size)
            {
                link_list_pop(a->_recover);    
                //printf("==init_size\n");
                //一个free_list回收满了,要么放到free_list中,要么归还central
                if(a->free_size >= a->collect_factor)
                {
                    //将f归还给central_allocator;    
                    give_back_to_central(a->central_allocator,a->array_idx,f);
                    a->free_size -= f->size;
                }
                else
                    link_list_push_back(a->_free_list,(list_node*)f);
            }
        }
        else
        {
            f = (struct free_list*)link_list_head(a->_free_list);
            assert(f);
            free_list_put(f,ptr);
            ++a->free_size;
        }
    }
    
    void thread_allocator_info(struct thread_allocator *a)
    {
        printf("free_size:%d\n",a->free_size);
        {
            struct free_list *f = (struct free_list*)link_list_head(a->_free_list);
            while(f)
            {
                printf("f size%d\n",f->size);
                f = (struct free_list*)((list_node*)f)->next;
            }
        }
        {    
            struct free_list *f = (struct free_list*)link_list_head(a->_recover);
            while(f)
            {
                printf("f recover size%d\n",f->size);
                f = (struct free_list*)((list_node*)f)->next;
            }
        }
    
    }
    
    
    extern uint8_t GetK(uint32_t size);
    
    static struct thread_cache* thread_cache_create(block_obj_allocator_t ba)
    {
        struct thread_cache *tc = calloc(1,sizeof(*tc));
        int32_t i = 0;
        for( ; i < 17; ++i)
        {
            tc->_allocator[i].central_allocator = ba;
            tc->_allocator[i]._free_list = LINK_LIST_CREATE();
            tc->_allocator[i]._recover = LINK_LIST_CREATE();
            tc->_allocator[i].array_idx = i;
            tc->_allocator[i].collect_factor = ((DEFAULT_BLOCK_SIZE)*2)/(1<<i);
        }
        spin_lock(ba->mtx,4000);
        LINK_LIST_PUSH_BACK(ba->thread_caches,tc);
        spin_unlock(ba->mtx);
        return tc; 
    }
    
    static void release_freelist(struct link_list *flist)
    {
        list_node *l = link_list_head(flist);
        while(l)
        {
            struct free_list *f = (struct free_list*)l;
            l = l->next;
            free(f->mem);
            free(f);
            //printf("destroy_freelist\n");
        }    
    }
    
    static void destroy_thread_cache(struct thread_cache *tc)
    {
        int32_t i = 0;
        for(; i < 17; ++i)
        {
            release_freelist(tc->_allocator[i]._free_list);
            release_freelist(tc->_allocator[i]._recover);
            LINK_LIST_DESTROY(&(tc->_allocator[i]._free_list));
            LINK_LIST_DESTROY(&(tc->_allocator[i]._recover));
        }
        free(tc);
    }
    
    static void* thread_cache_alloc(struct thread_cache *tc,uint32_t size)
    {
        size += sizeof(int32_t);
        uint8_t k = GetK(size);
        size = 1 << k;
        int32_t *ptr = (int32_t*)thread_allocator_alloc(&(tc->_allocator[k]));
        *ptr = k;
        ptr++;
        return (void*)ptr;
    }
    
    static void  thread_cache_dealloc(struct thread_cache *tc,void *ptr)
    {
        int32_t *_ptr = ((int32_t*)ptr)-1;
        uint8_t k = *_ptr;
        thread_allocator_dealloc(&(tc->_allocator[k]),_ptr);
    }
    
    static void thread_cache_info(struct thread_cache *tc,uint32_t size)
    {
        size += sizeof(int32_t);
        uint8_t k = GetK(size);
        thread_allocator_info(&(tc->_allocator[k]));
    }
    
    static void* block_obj_al_alloc(struct allocator *a, int32_t size)
    {
        block_obj_allocator_t ba = (block_obj_allocator_t)a;
        struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);
        if(!tc)
        {
            tc = thread_cache_create(ba);
            pthread_setspecific(ba->t_key,(void*)tc);
        }
        return thread_cache_alloc(tc,size);
    }
    
    static void  block_obj_al_dealloc(struct allocator*a, void *ptr)
    {
        block_obj_allocator_t ba = (block_obj_allocator_t)a;
        struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);
        assert(tc);
        thread_cache_dealloc(tc,ptr);
    }
    
    static void destroy_block_obj_al(struct allocator **a)
    {
        block_obj_allocator_t ba = (block_obj_allocator_t)*a;
        //销毁所有的thread_cache
        {
            list_node *l = link_list_head(ba->thread_caches);
            while(l)
            {
                struct thread_cache *tc = (struct thread_cache *)l;
                l = l->next;
                destroy_thread_cache(tc); 
            }
            LINK_LIST_DESTROY(&ba->thread_caches);
        }
        //销毁所有free_list
        {
            int32_t i = 0;
            for( ; i < 17; ++i)
            {
                release_freelist(ba->_free_list[i]);
                LINK_LIST_DESTROY(&ba->_free_list[i]);
            }
        }
        {
            int32_t i = 0;
            for( ; i < 17; ++i)
            {
                spin_destroy(&(ba->_free_list_mtx[i]));
            }
        }
        spin_destroy(&(ba->mtx));
        pthread_key_delete(ba->t_key);
        free(ba);
        *a = NULL;    
    }
    
    block_obj_allocator_t create_block_obj_allocator()
    {
        block_obj_allocator_t ba = (block_obj_allocator_t)calloc(1,sizeof(*ba));
        ba->mtx = spin_create();
        ba->thread_caches = LINK_LIST_CREATE();
        int32_t i = 0;
        for( ; i < 17; ++i)
        {
            ba->_free_list[i] = LINK_LIST_CREATE();
            ba->_free_list_mtx[i] = spin_create();
        }
        pthread_key_create(&ba->t_key,0);
        ba->super_class.Alloc = block_obj_al_alloc;
        ba->super_class.DeAlloc = block_obj_al_dealloc;
        ba->super_class.Destroy = destroy_block_obj_al;
        return ba;
    }
    
    void print_info(block_obj_allocator_t ba,int size)
    {
        struct thread_cache *tc = (struct thread_cache*)pthread_getspecific(ba->t_key);
        thread_cache_info(tc,size);
    }

    test.c

    #include "log.h"
    #include <stdio.h>
    #include "atomic.h"
    #include "wpacket.h"
    #include "packet_allocator.h"
    #include <stdlib.h>
    #include "SysTime.h"
    #include <string.h>
    #include "block_obj_allocator.h"
    #include "clib/include/mem_allocator.h"
    #include "clib/include/fix_obj_pool.h"
    uint32_t GetSize_of_pow2(uint32_t size);
    uint8_t GetK(uint32_t size);
    
    
    
    
    
    void test1(char **tmp)
    {
        {
            allocator_t a =  create_pool(16,65536*10,1);
            int j = 0;
            uint16_t *p; 
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);            
                }
                printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick);
                i = 0;
                for(; i < 10000000;++i)
                    FREE(a,tmp[i]);                    
    
            }
            DESTROY(&a);
        }
        {
            allocator_t a =  gen_allocator_create(65536);
            int j = 0;
            uint16_t *p; 
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);            
                }
                printf("gen_allocator finish:%d\n",GetSystemMs()-tick);
                i = 0;
                for(; i < 10000000;++i)
                    FREE(a,tmp[i]);        
            }
            DESTROY(&a);
        }
        {
            allocator_t a = (allocator_t)create_block_obj_allocator();
            int j = 0;
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);
                }
                printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick);
                i = 0;
                for(; i < 10000000;++i)
                    FREE(a,tmp[i]);            
            }
            DESTROY(&a);
        }
        {
            int j = 0;
            for(;j<10;++j)
            {        
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = malloc(16);
                }
                printf("tcmalloc finish:%d\n",GetSystemMs()-tick);            
                i = 0;
                for(; i < 10000000;++i)
                    free(tmp[i]);
    
            }
        }    
    }
    
    void test2(char **tmp)
    {
        {
            allocator_t a =  create_pool(16,65536*10,1);
            int j = 0;
            uint16_t *p; 
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);        
                }
    
                i = 0;
                for(; i < 10000000;++i)
                    FREE(a,tmp[i]);        
                printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick);            
    
            }
            DESTROY(&a);
        }
        {
            allocator_t a =  gen_allocator_create(65536);
            int j = 0;
            uint16_t *p; 
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);        
                }
    
                i = 0;
                for(; i < 10000000;++i)
                    FREE(a,tmp[i]);        
                printf("gen_allocator finish:%d\n",GetSystemMs()-tick);
            }
            DESTROY(&a);
        }
        {
            allocator_t a = (allocator_t)create_block_obj_allocator();
            int j = 0;
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);
                }
                i = 0;
                for(; i < 10000000;++i)
                    FREE(a,tmp[i]);    
                printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick);        
            }
            DESTROY(&a);
        }
        {
            int j = 0;
            for(;j<10;++j)
            {        
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = malloc(16);
                }
    
                i = 0;
                for(; i < 10000000;++i)
                    free(tmp[i]);
                printf("tcmalloc finish:%d\n",GetSystemMs()-tick);    
            }
        }    
    }
    
    void test3(char **tmp)
    {
        {
            allocator_t a =  create_pool(16,65536*10,1);
            int j = 0;
            uint16_t *p; 
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);
                    if((i+1)%100000 == 0)
                    {
                        int k = (i+1)-100000;
                        for(; k < i+1;++k)
                        {
                            FREE(a,tmp[k]);
                        }
                    }            
                }
                printf("fix_obj_pool finish:%d\n",GetSystemMs()-tick);            
    
            }
            DESTROY(&a);
        }
        {
            allocator_t a =  gen_allocator_create(65536);
            int j = 0;
            uint16_t *p; 
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);
                    if((i+1)%100000 == 0)
                    {
                        int k = (i+1)-100000;
                        for(; k < i+1;++k)
                        {
                            FREE(a,tmp[k]);
                        }
                    }            
                }
                printf("gen_allocator finish:%d\n",GetSystemMs()-tick);
            }
            DESTROY(&a);
        }
        {
            allocator_t a = (allocator_t)create_block_obj_allocator();
            int j = 0;
            for(;j<10;++j)
            {
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = ALLOC(a,16);
                    if((i+1)%100000 == 0)
                    {
                        int k = (i+1)-100000;
                        for(; k < i+1;++k)
                            FREE(a,tmp[k]);
                    }
                }
                printf("block_obj_allocator finish:%d\n",GetSystemMs()-tick);        
            }
            DESTROY(&a);
        }
        {
            int j = 0;
            for(;j<10;++j)
            {        
                uint32_t tick = GetSystemMs();
                int i = 0;
                for( ; i < 10000000; ++i)
                {
                    tmp[i] = malloc(16);
                    if((i+1)%100000 == 0)
                    {
                        int k = (i+1)-100000;
                        for(; k < i+1;++k)
                            free(tmp[k]);
                    }
                }
                printf("tcmalloc finish:%d\n",GetSystemMs()-tick);    
            }
        }    
    }
    
    
    
    
    
    
    int main()
    {    
    
    
        char **tmp = calloc(1,sizeof(char*)*10000000);
        test1(tmp);
        printf("test1 finish------------\n");
        test2(tmp);
        printf("test2 finish------------\n");
        test3(tmp);    
        printf("test3 finish------------\n");
            free(tmp);
            return 0;
    }

    项目地址:

    https://github.com/sniperHW/kendylib

  • 相关阅读:
    C#编码标准--编码习惯
    课程九,课堂测试
    JAVAweb 分级测试
    第八周 课堂报告
    javaweb界面
    12月9日,第一次自查报告
    课程管理系统后台JAVA代码
    《程序员修炼之道+从小工到专家》读后有感
    12月9日 自查后续
    课程管理系统JAVAweb前端代码
  • 原文地址:https://www.cnblogs.com/sniperHW/p/2606227.html
Copyright © 2020-2023  润新知