STL源码笔记1 —— allocators

简述

allocators是STL中很重要的一个幕后英雄的角色，STL中的容器在使用过程中需要不断的放元素进去和取元素出来，而在此过程中，如何更高效的申请和释放内存是十分影响STL容器的性能的。

operator new() 和 malloc()

首先内存的分配动作，一层层调用下去，最终到了CRT的层面上都是调用malloc()来分配，而malloc再根据所在的操作系统，调用不同的操作系统api才能真正的拿到内存。下面是vs2015和gcc 2.95的源码，里面的 operator new 最终调用 malloc 进行内存分配：

//VS2015中，Microsoft Visual Studio 14.0VCcrtsrcvcruntime
ew_debug.cpp中的源码
//debug模式的_malloc_dbg
void* __CRTDECL operator new(
    size_t const size,
    int const    block_use,
    char const*  file_name,
    int const    line_number
    )
{
    for (;;)
    {
        if (void* const block = _malloc_dbg(size, block_use, file_name, line_number))
        {
            return block;
        }

        if (_callnewh(size) == 0)
        {
            if (size == SIZE_MAX)
            {
                __scrt_throw_std_bad_array_new_length();
            }
            else
            {
                __scrt_throw_std_bad_alloc();
            }
        }
    }
}

//VS2015中，Microsoft Visual Studio 14.0VCcrtsrclinkopts
othrownew.cpp中的源码
//不抛异常的new 直接调用malloc
void* operator new(size_t size)
{
    for (;;)
    {
        if (void* const block = malloc(size))
            return block;

        if (_callnewh(size) == 0)
            return nullptr;

        // The new handler was successful; try to allocate again...
    }
}


//在gcc 2.95里面 gcc-2.95.1gcccp
ew1.cc
void * operator new (size_t sz, const std::nothrow_t&) throw()
{
  void *p;

  /* malloc (0) is unpredictable; avoid it.  */
  if (sz == 0)
    sz = 1;
  p = (void *) malloc (sz);
  while (p == 0)
    {
      new_handler handler = __new_handler;
      if (! handler)
	return 0;
      try
	{
	  handler ();
	}
      catch (bad_alloc &)
	{
	  return 0;
	}

      p = (void *) malloc (sz);
    }

  return p;
}

void * operator new (size_t sz) throw (std::bad_alloc)
{
  void *p;

  /* malloc (0) is unpredictable; avoid it.  */
  if (sz == 0)
    sz = 1;
  p = (void *) malloc (sz);
  while (p == 0)
    {
      new_handler handler = __new_handler;
      if (! handler)
	throw bad_alloc ();
      handler ();
      p = (void *) malloc (sz);
    }

  return p;
}

然而malloc分配的内存如果在debug模式下，会有许多额外的信息（包括大小、前后块指针、使用情况等信息），而即使是在release模式下，也至少会有标识大小的字节被占用。那么每次申请的内存就会有额外的开销，如果申请的空间很小，额外的开销占比就会很大。因此，产生了一种使用内存管理，减少这种开销的想法，这也是STL的allocators分配器最核心的功能。

VS2015 中的allocator

在VS中，几个容器的设计是这样的：

//VS中的vector
template<class _Ty,
	class _Alloc = allocator<_Ty> >
	class vector
		: public _Vector_alloc<_Vec_base_types<_Ty, _Alloc> >
	{	// varying size array of values
        ...
    };

//VS中的list
template<class _Ty,
	class _Alloc = allocator<_Ty> >
	class list
		: public _List_buy<_Ty, _Alloc>
	{	// bidirectional linked list
        ...
    };

可以看到在VS的容器里，默认使用的是allocator这个class，那么再去 Microsoft Visual Studio 14.0VCincludexmemory0 观察allocator的实现：

template<class _Ty>
	class allocator
	{	// generic allocator for objects of class _Ty
public:
    ...
    
	_DECLSPEC_ALLOCATOR pointer allocate(size_type _Count)
		{	// allocate array of _Count elements
		return (static_cast<pointer>(_Allocate(_Count, sizeof (_Ty))));
		}

	_DECLSPEC_ALLOCATOR pointer allocate(size_type _Count, const void *)
		{	// allocate array of _Count elements, ignore hint
		return (allocate(_Count));
		}
    };

allocate调用了_Allocate，再去看_Allocate的实现：

	_DECLSPEC_ALLOCATOR void *_Allocate(size_t _Count, size_t _Sz,
		bool _Try_aligned_allocation = true)
	{	// allocate storage for _Count elements of size _Sz
	void *_Ptr = 0;

	if (_Count == 0)
		return (_Ptr);

	// check overflow of multiply
	if ((size_t)(-1) / _Sz < _Count)
		_Xbad_alloc();	// report no memory
	const size_t _User_size = _Count * _Sz;

 #if defined(_M_IX86) || defined(_M_X64)
	if (_Try_aligned_allocation
		&& _BIG_ALLOCATION_THRESHOLD <= _User_size)
		{	// allocate large block
		static_assert(sizeof (void *) < _BIG_ALLOCATION_ALIGNMENT,
			"Big allocations should at least match vector register size");
		const size_t _Block_size = _NON_USER_SIZE + _User_size;
		if (_Block_size <= _User_size)
			_Xbad_alloc();	// report no memory
		const uintptr_t _Ptr_container =
			reinterpret_cast<uintptr_t>(::operator new(_Block_size));
		_SCL_SECURE_ALWAYS_VALIDATE(_Ptr_container != 0);
		_Ptr = reinterpret_cast<void *>((_Ptr_container + _NON_USER_SIZE)
			& ~(_BIG_ALLOCATION_ALIGNMENT - 1));
		static_cast<uintptr_t *>(_Ptr)[-1] = _Ptr_container;

 #ifdef _DEBUG
		static_cast<uintptr_t *>(_Ptr)[-2] = _BIG_ALLOCATION_SENTINEL;
 #endif /* _DEBUG */
		}
	else
 #endif /* defined(_M_IX86) || defined(_M_X64) */

		{	// allocate normal block
		_Ptr = ::operator new(_User_size);
		_SCL_SECURE_ALWAYS_VALIDATE(_Ptr != 0);
		}
	return (_Ptr);
	}

里面就是调用_Xbad_alloc()或者 ::operator new 来实现内存的分配，所以实际上VS并没有对STL的allocator做特别的优化。
同样的，通过查看 deallocate() 对应的源代码，也看出来VS在释放的时候也只是一个对 operator delete 的封装而已。因此，可以认为VS在这一方面并没有做特殊设计。

GCC2.9 中的allocator

同样，我们也去看GCC中容器的实现：

//gcc-2.95.1libstdc++stlstl_vector.h
template <class _Tp, class _Alloc = __STL_DEFAULT_ALLOCATOR(_Tp) >
class vector : protected _Vector_base<_Tp, _Alloc> 
{
    ···
};

//gcc-2.95.1libstdc++stlstl_list.h
template <class _Tp, class _Alloc = __STL_DEFAULT_ALLOCATOR(_Tp) >
class list : protected _List_base<_Tp, _Alloc> {
    ...
};

//在stl_config.h中
# ifndef __STL_DEFAULT_ALLOCATOR
#   ifdef __STL_USE_STD_ALLOCATORS
#     define __STL_DEFAULT_ALLOCATOR(T) allocator<T>
#   else
#     define __STL_DEFAULT_ALLOCATOR(T) alloc
#   endif
# endif

可以看到，宏 __STL_DEFAULT_ALLOCATOR，如果没有特别说明的情况下，是使用alloc的，那么我们来看看alloc类的实现：

//默认情况下 __NODE_ALLOCATOR_THREADS 为false，所以是单线程的
# ifdef _NOTHREADS
#   define __NODE_ALLOCATOR_LOCK
#   define __NODE_ALLOCATOR_UNLOCK
#   define __NODE_ALLOCATOR_THREADS false
#   define __VOLATILE
# endif
typedef __default_alloc_template<__NODE_ALLOCATOR_THREADS, 0> alloc;

template <bool threads, int inst>
class __default_alloc_template {
    ...
private:
    enum {_ALIGN = 8};
    enum {_MAX_BYTES = 128};
    enum {_NFREELISTS = _MAX_BYTES/_ALIGN};

    union _Obj {
        union _Obj* _M_free_list_link;
        char _M_client_data[1];    /* The client sees this.        */
    };

    static _Obj* __VOLATILE _S_free_list[_NFREELISTS];
    // Chunk allocation state.
    static char* _S_start_free;
    static char* _S_end_free;
    static size_t _S_heap_size;

    ...
};

借用侯捷老师在课程中用的图，其实在gcc的alloc中，设计了一个16条链表的数组 _S_free_list ，每一条链表负责某个大小的特定区块的分配，分别负责从8个字节到128个字节的区块。当容器需要分配内存的时候，都是从这个分配器中去申请，然后大小向上调整到8的倍数（例如120字节会调整到128字节），然后到分配器里再去对应的链表中搜索是否有空闲的区块（例如128字节需要到 __S_free_list[15] 里面搜索），如果该链表中没有挂着空闲内存，才会通过malloc向系统申请一大块内存再切割。

avatar

同样，再往下看，看看该类中的 allocate 和 deallocate的实现：

template <bool threads, int inst>
class __default_alloc_template {
    ...

      static  size_t _S_freelist_index(size_t __bytes) {
        return (((__bytes) + _ALIGN-1)/_ALIGN - 1);
      }

    public:
      /* __n must be > 0      */
      static void* allocate(size_t __n)
      {
        _Obj* __VOLATILE* __my_free_list;
        _Obj* __RESTRICT __result;

        if (__n > (size_t) _MAX_BYTES) {
            return(malloc_alloc::allocate(__n));
        }
        __my_free_list = _S_free_list + _S_freelist_index(__n);

        __result = *__my_free_list;
        if (__result == 0) {
            void* __r = _S_refill(_S_round_up(__n));
            return __r;
        }
        *__my_free_list = __result -> _M_free_list_link;
        return (__result);
      };

      /* __p may not be 0 */
      static void deallocate(void* __p, size_t __n)
      {
        _Obj* __q = (_Obj*)__p;
        _Obj* __VOLATILE* __my_free_list;

        if (__n > (size_t) _MAX_BYTES) {
            malloc_alloc::deallocate(__p, __n);
            return;
        }
        __my_free_list = _S_free_list + _S_freelist_index(__n);

        __q -> _M_free_list_link = *__my_free_list;
        *__my_free_list = __q;
        // lock is released here
      }

  ...
};

_S_freelist_index(__n) 这个函数可以计算出需要的空间会落在 _S_free_list 这个数组的哪个index上，再从中取出一块空余内存分配。而如果没有空余内存，则会调用 _S_refill(_S_round_up(__n)) 该函数重新分配一段内存，源代码如下：

template <bool __threads, int __inst>
void*
__default_alloc_template<__threads, __inst>::_S_refill(size_t __n)
{
    int __nobjs = 20;
    char* __chunk = _S_chunk_alloc(__n, __nobjs);
    _Obj* __VOLATILE* __my_free_list;
    _Obj* __result;
    _Obj* __current_obj;
    _Obj* __next_obj;
    int __i;

    if (1 == __nobjs) return(__chunk);
    __my_free_list = _S_free_list + _S_freelist_index(__n);

    /* Build free list in chunk */
      __result = (_Obj*)__chunk;
      *__my_free_list = __next_obj = (_Obj*)(__chunk + __n);
      for (__i = 1; ; __i++) {
        __current_obj = __next_obj;
        __next_obj = (_Obj*)((char*)__next_obj + __n);
        if (__nobjs - 1 == __i) {
            __current_obj -> _M_free_list_link = 0;
            break;
        } else {
            __current_obj -> _M_free_list_link = __next_obj;
        }
      }
    return(__result);
}

其中，真正申请空间的函数就是 _S_chunk_alloc(__n, __nobjs) ，其中 __nobjs = 20，阅读下面的代码可以看到如果重新分配一块内存，会申请一块20倍大小再多一点的内存：

size_t __total_bytes = __size * __nobjs;
size_t __bytes_to_get = 2 * __total_bytes + _S_round_up(_S_heap_size >> 4);

然后再从该空间中分出一块作为当前分配，具体源码如下：

template <bool __threads, int __inst>
char*
__default_alloc_template<__threads, __inst>::_S_chunk_alloc(size_t __size,
                                                            int& __nobjs)
{
    char* __result;
    size_t __total_bytes = __size * __nobjs;
    size_t __bytes_left = _S_end_free - _S_start_free;

    if (__bytes_left >= __total_bytes) {
        __result = _S_start_free;
        _S_start_free += __total_bytes;
        return(__result);
    } else if (__bytes_left >= __size) {
        __nobjs = (int)(__bytes_left/__size);
        __total_bytes = __size * __nobjs;
        __result = _S_start_free;
        _S_start_free += __total_bytes;
        return(__result);
    } else {
        size_t __bytes_to_get =
	  2 * __total_bytes + _S_round_up(_S_heap_size >> 4);
        // Try to make use of the left-over piece.
        if (__bytes_left > 0) {
            _Obj* __VOLATILE* __my_free_list =
                        _S_free_list + _S_freelist_index(__bytes_left);

            ((_Obj*)_S_start_free) -> _M_free_list_link = *__my_free_list;
            *__my_free_list = (_Obj*)_S_start_free;
        }
        _S_start_free = (char*)malloc(__bytes_to_get);
        if (0 == _S_start_free) {
            size_t __i;
            _Obj* __VOLATILE* __my_free_list;
	    _Obj* __p;
            // Try to make do with what we have.  That can't
            // hurt.  We do not try smaller requests, since that tends
            // to result in disaster on multi-process machines.
            for (__i = __size; __i <= _MAX_BYTES; __i += _ALIGN) {
                __my_free_list = _S_free_list + _S_freelist_index(__i);
                __p = *__my_free_list;
                if (0 != __p) {
                    *__my_free_list = __p -> _M_free_list_link;
                    _S_start_free = (char*)__p;
                    _S_end_free = _S_start_free + __i;
                    return(_S_chunk_alloc(__size, __nobjs));
                    // Any leftover piece will eventually make it to the
                    // right free list.
                }
            }
	    _S_end_free = 0;	// In case of exception.
            _S_start_free = (char*)malloc_alloc::allocate(__bytes_to_get);
            // This should either throw an
            // exception or remedy the situation.  Thus we assume it
            // succeeded.
        }
        _S_heap_size += __bytes_to_get;
        _S_end_free = _S_start_free + __bytes_to_get;
        return(_S_chunk_alloc(__size, __nobjs));
    }
}

相关阅读:
springmvc
POJ 3683 Priest John's Busiest Day
POJ 3678 Katu Puzzle
HDU 1815 Building roads
CDOJ UESTC 1220 The Battle of Guandu
HDU 3715 Go Deeper
HDU 3622 Bomb Game
POJ 3207 Ikki's Story IV
POJ 3648 Wedding
HDU 1814 Peaceful Commission
原文地址：https://www.cnblogs.com/zhqherm/p/12156450.html