    pagemap_(64位系统使用三层radix-tree TCMalloc_PageMap2,32位使用两层 TCMalloc_PageMap3)。




    之前说 pagemap_是PageId ----> Span的映射,再show一下这个图


    pagemap_cache_是PageId -----> SizeClass的映射,用的数据结构是一个压缩的哈希表PackedCache,实现上就是用了一个数组,然后用key做hash插入到对应的entry,但是却内部有不少细节:PackedCache有三个位数kHashbits,kValuebits,kKeybits。数组的长度是1<<kHashBits,hash函数直接是用key对长度取模(key & ( 1 << kHashBits - 1));kValueBits和kKeyBits代表value和key分别占多少位;有一个问题,如果kKeybits大于了kHashbits,那就有可能多个key映射到同一个entry,为了区分开,每一个entry放的值的数值二进制位是部分key位和所有value位的: |  kKeybits - kHashbits   |  kValuebits  | 。


    PageId -----> SizeClass的映射能用到压缩的哈希表,也是因为SizeClass是一个比较小的值,如果纯用作Value的话比如Value可能是16位或者64位,会浪费一些数位,剩余的数位也可以利用起来做key。



    const PageID p = reinterpret_cast<uintptr_t >(ptr) >> kPageShift;


    size_t cl = Static:: pageheap()->GetSizeClassIfCached (p);


    span = Static ::pageheap()-> GetDescriptor(p );

    cl = span ->sizeclass;


    heap->Deallocate (ptr, cl);


    函数ThreadCache:: Deallocate 的实现:

    inline void ThreadCache:: Deallocate(void * ptr, size_t cl ) {
    FreeList* list = &list_ [cl];
    size_ += Static::sizemap ()->ByteSizeForClass( cl);
    ssize_t size_headroom = max_size_ - size_ - 1;
    // This catches back-to-back frees of allocs in the same size
    // class. A more comprehensive (and expensive) test would be to walk
    // the entire freelist. But this might be enough to find some bugs.
    ASSERT( ptr != list ->Next());
    list-> Push(ptr );
    ssize_t list_headroom =
    static_cast<ssize_t >(list-> max_length()) - list ->length();
    // There are two relatively uncommon things that require further work.
    // In the common case we're done, and in that case we need a single branch
    // because of the bitwise-or trick that follows.
    if (( list_headroom | size_headroom ) < 0) {
    if (list_headroom < 0) {
    ListTooLong(list , cl);
    if (size_ >= max_size_) Scavenge();


    void ThreadCache ::ListTooLong( FreeList* list , size_t cl) {
    const int batch_size = Static:: sizemap()->num_objects_to_move (cl);
    ReleaseToCentralCache( list, cl , batch_size);
    // If the list is too long, we need to transfer some number of
    // objects to the central cache.  Ideally, we would transfer
    // num_objects_to_move, so the code below tries to make max_length
    // converge on num_objects_to_move.
    if ( list->max_length () < batch_size) {
    // Slow start the max_length so we don't overreserve.
    list->set_max_length (list-> max_length() + 1);
      } else if (list ->max_length() > batch_size) {
    // If we consistently go over max_length, shrink max_length.  If we don't
    // shrink it, some amount of memory will always stay in this freelist.
    list->set_length_overages (list-> length_overages() + 1);
    if (list ->length_overages() > kMaxOverages) {
    ASSERT(list ->max_length() > batch_size);
    list->set_max_length (list-> max_length() - batch_size );
    list->set_length_overages (0);

    当长度超过上限的时候,移回部分空闲对象到Central Cache中去,ReleaseToCentralCache实现不贴了,无非就是从线程FreeList弹出指定个内存对象插入到对应CentralFreeList中去。

    在Centreal Cache中释放


    void CentralFreeList ::ReleaseListToSpans( void* start ) {
    while ( start) {
    void *next = SLL_Next( start);
    ReleaseToSpans(start );
    start = next ;

    就是一个一个内存对象调用ReleaseToSpans 释放,ReleaseToSpans 如下:

    void CentralFreeList ::ReleaseToSpans( void* object ) {
    Span* span = MapObjectToSpan (object);
    ASSERT( span != NULL );
    ASSERT( span->refcount > 0);
    // If span is empty, move it to non-empty list
    if ( span->objects == NULL) {
    tcmalloc::DLL_Remove (span);
    tcmalloc::DLL_Prepend (&nonempty_, span);
    Event(span , 'N', 0);
    // The following check is expensive, so it is disabled by default
    if ( false) {
    // Check that object does not occur in list
    int got = 0;
    for (void * p = span->objects ; p != NULL; p = *((void**) p)) {
    ASSERT(p != object);
    ASSERT(got + span-> refcount ==
               ( span->length <<kPageShift) /
    Static::sizemap ()->ByteSizeForClass( span->sizeclass ));
    span-> refcount--;
    if ( span->refcount == 0) {
    Event(span , '#', 0);
    counter_ -= ((span ->length<< kPageShift) /
    Static::sizemap ()->ByteSizeForClass( span->sizeclass ));
    tcmalloc::DLL_Remove (span);
        -- num_spans_;
    // Release central list lock while operating on pageheap
    lock_.Unlock ();
    SpinLockHolder h(Static ::pageheap_lock());
    Static::pageheap ()->Delete( span);
    lock_.Lock ();
      } else {
        *( reinterpret_cast<void **>(object)) = span->objects ;
    span->objects = object;


    1.判断这个Span所标识的对象是不是之前已经分配完了,若是就要把他从CentralFreeList的empty_ Spans List列表中挪出到nonempty_  Spans List中,因为我要把返回的内存对象给这个Span了



    与在PageHeap中分配内存的New对应,释放内存是Delete,Delete主要是取消Span与某个SizeClass关联和取消这个Span正在使用的状态标记为ON_NORMAL_FREELIST,即将要放入normal list中,之后就是调用MergeIntoFreeList,即和邻近的空闲内存合并放入空闲链表中。MergeIntoFreeList的实现如下:

    void PageHeap ::MergeIntoFreeList( Span* span ) {
    ASSERT( span->location != Span:: IN_USE);
    // Coalesce -- we guarantee that "p" != 0, so no bounds checking
    // necessary.  We do not bother resetting the stale pagemap
    // entries for the pieces we are merging together because we only
    // care about the pagemap entries for the boundaries.
    // Note that only similar spans are merged together.  For example,
    // we do not coalesce "returned" spans with "normal" spans.
    const PageID p = span-> start;
    const Length n = span-> length;
    Span* prev = GetDescriptor (p-1);
    if ( prev != NULL && prev-> location == span ->location) {
    // Merge preceding span into this span
    ASSERT(prev ->start + prev->length == p);
    const Length len = prev->length ;
    RemoveFromFreeList(prev );
    DeleteSpan(prev );
    span->start -= len;
    span->length += len;
    pagemap_.set (span-> start, span );
    Event(span , 'L', len);
    Span* next = GetDescriptor (p+ n);
    if ( next != NULL && next-> location == span ->location) {
    // Merge next span into this span
    ASSERT(next ->start == p+n );
    const Length len = next->length ;
    RemoveFromFreeList(next );
    DeleteSpan(next );
    span->length += len;
    pagemap_.set (span-> start + span ->length - 1, span);
    Event(span , 'R', len);
    PrependToFreeList( span);

    MergeIntoFreeList就是取目标Span标识的内存邻近的页对应的Span出来,判断如果能和目标Span合并就合并之,之后才插入到normal free list中去。


    void PageHeap ::IncrementalScavenge( Length n ) {
    // Fast path; not yet time to release memory
    scavenge_counter_ -= n;
    if ( scavenge_counter_ >= 0) return ;  // Not yet time to scavenge
    const double rate = FLAGS_tcmalloc_release_rate;
    if ( rate <= 1e-6) {
    // Tiny release rate means that releasing is disabled.
    scavenge_counter_ = kDefaultReleaseDelay ;
    Length released_pages = ReleaseAtLeastNPages (1);
    if ( released_pages == 0) {
    // Nothing to scavenge, delay for a while.
    scavenge_counter_ = kDefaultReleaseDelay ;
      } else {
    // Compute how long to wait until we return memory.
    // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages
    // after releasing one page.
    const double mult = 1000.0 / rate;
    double wait = mult * static_cast<double >(released_pages);
    if (wait > kMaxReleaseDelay) {
    // Avoid overflow and bound to reasonable range.
    wait = kMaxReleaseDelay ;
    scavenge_counter_ = static_cast <int64_t>( wait);

    又是ReleaseAtLeastNPages 调用,在TCMalloc源码分析(三)中有详细分析这个调用,记住windows上内存是不还给系统的,细节不在复述了。


    之前说到ListTooLong返还内存给Central Cache后,调整了max_length,主要是怕链表后面的空闲内存一直在本地线程中,自己不用也不释放给其他线程用。


    void ThreadCache ::Scavenge() {
    // If the low-water mark for the free list is L, it means we would
    // not have had to allocate anything from the central cache even if
    // we had reduced the free list size by L.  We aim to get closer to
    // that situation by dropping L/2 nodes from the free list.  This
    // may not release much memory, but if so we will call scavenge again
    // pretty soon and the low-water marks will be high on that call.
    //int64 start = CycleClock::Now();
    for ( int cl = 0; cl < kNumClasses; cl ++) {
    FreeList* list = &list_[ cl];
    const int lowmark = list->lowwatermark ();
    if (lowmark > 0) {
    const int drop = ( lowmark > 1) ? lowmark /2 : 1;
    ReleaseToCentralCache(list , cl, drop);
    // Shrink the max length if it isn't used.  Only shrink down to
    // batch_size -- if the thread was active enough to get the max_length
    // above batch_size, it will likely be that active again.  If
    // max_length shinks below batch_size, the thread will have to
    // go through the slow-start behavior again.  The slow-start is useful
    // mainly for threads that stay relatively idle for their entire
    // lifetime.
    const int batch_size = Static::sizemap ()->num_objects_to_move( cl);
    if (list ->max_length() > batch_size) {
    list->set_max_length (
    max<int >(list-> max_length() - batch_size , batch_size));
    list->clear_lowwatermark ();

    整个过程就是遍历所有FreeList进行逐一释放,每一个FreeList有一个lowwatermark L,代表上次回收内存后FreeList的长度,每次回收时释放 L/2个object,下次回收时L就表示自从上次回收后一直没有用过的内存,那就把他还给Central Cache吧。这就是这种用历史记录预测未来内存使用情况的策略。


    void ThreadCache ::IncreaseCacheLimitLocked() {
    if ( unclaimed_cache_space_ > 0) {
    // Possibly make unclaimed_cache_space_ negative.
    unclaimed_cache_space_ -= kStealAmount ;
    max_size_ += kStealAmount ;
    // Don't hold pageheap_lock too long.  Try to steal from 10 other
    // threads before giving up.  The i < 10 condition also prevents an
    // infinite loop in case none of the existing thread heaps are
    // suitable places to steal from.
    for ( int i = 0; i < 10;
           ++ i, next_memory_steal_ = next_memory_steal_-> next_) {
    // Reached the end of the linked list.  Start at the beginning.
    if (next_memory_steal_ == NULL) {
    ASSERT(thread_heaps_ != NULL);
    next_memory_steal_ = thread_heaps_ ;
    if (next_memory_steal_ == this ||
    next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
    next_memory_steal_->max_size_ -= kStealAmount;
    max_size_ += kStealAmount ;
    next_memory_steal_ = next_memory_steal_ ->next_;

    kStealAmount是在ThreadCache被强制Scavenge后,max_size_应该从unclaimed_cache_space_或者其他线程偷取的字节数,这样就可以使得下次 Scavenge被延迟避免频繁Scavenge。这个过程其实是在表达这样的意思:这次是我花时间把自己的内存返还给Central Cache了,下次轮到其他线程去做了。因为这个过程是在多个线程之间调整他们所能够拥有的内存上限,所以当然要用到锁了。



