• go语言的动态内存


    go语言动态内存的申请和释放设计来自于tcmalloc
    主要数据结构:
    MHeap:the malloc heap,管理page
    MCentral:特定类型小对象共享的free list
    MCache:线程本地小对象的共享free list

    分配小对象

    1. 查找MCache相应大小的free list,如果free list非空,从free list中直接获取,
      这种情况下不需要任何锁的开销
    2. 如果MCache的free list为空,则从MCentral获取一些free object
    3. 如果MCentral的free list为空,则从MHeap申请一些page,然后将page内存加入到相应MCentral的free list
    4. 如果MHeap缓存的page不足,则从操作系统申请一些page(至少1M)

    分配大对象

    大对象直接从MHeap从分配

    申请动态内存

    // Allocate an object of size bytes.
    // Small objects are allocated from the per-P cache's free lists.
    // Large objects (> 32 kB) are allocated straight from the heap.
    func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
        if size == 0 {
        	return unsafe.Pointer(&zerobase)
        }
        size0 := size
     
        if flags&flagNoScan == 0 && typ == nil {
        	gothrow("malloc missing type")
        }
     
        // This function must be atomic wrt GC, but for performance reasons
        // we don't acquirem/releasem on fast path. The code below does not have
        // split stack checks, so it can't be preempted by GC.
        // Functions like roundup/add are inlined. And onM/racemalloc are nosplit.
        // If debugMalloc = true, these assumptions are checked below.
        if debugMalloc {
        	mp := acquirem()
        	if mp.mallocing != 0 {
        		gothrow("malloc deadlock")
        	}
        	mp.mallocing = 1
        	if mp.curg != nil {
        		mp.curg.stackguard0 = ^uintptr(0xfff) | 0xbad
        	}
        }
     
        c := gomcache()
        var s *mspan
        var x unsafe.Pointer
        if size <= maxSmallSize {
        	if flags&flagNoScan != 0 && size < maxTinySize {
        		// Tiny allocator.
        		//
        		// Tiny allocator combines several tiny allocation requests
        		// into a single memory block. The resulting memory block
        		// is freed when all subobjects are unreachable. The subobjects
        		// must be FlagNoScan (don't have pointers), this ensures that
        		// the amount of potentially wasted memory is bounded.
        		//
        		// Size of the memory block used for combining (maxTinySize) is tunable.
        		// Current setting is 16 bytes, which relates to 2x worst case memory
        		// wastage (when all but one subobjects are unreachable).
        		// 8 bytes would result in no wastage at all, but provides less
        		// opportunities for combining.
        		// 32 bytes provides more opportunities for combining,
        		// but can lead to 4x worst case wastage.
        		// The best case winning is 8x regardless of block size.
        		//
        		// Objects obtained from tiny allocator must not be freed explicitly.
        		// So when an object will be freed explicitly, we ensure that
        		// its size >= maxTinySize.
        		//
        		// SetFinalizer has a special case for objects potentially coming
        		// from tiny allocator, it such case it allows to set finalizers
        		// for an inner byte of a memory block.
        		//
        		// The main targets of tiny allocator are small strings and
        		// standalone escaping variables. On a json benchmark
        		// the allocator reduces number of allocations by ~12% and
        		// reduces heap size by ~20%.
        		tinysize := uintptr(c.tinysize)
        		if size <= tinysize {
        			tiny := unsafe.Pointer(c.tiny)
        			// Align tiny pointer for required (conservative) alignment.
        			if size&7 == 0 {
        				tiny = roundup(tiny, 8)
        			} else if size&3 == 0 {
        				tiny = roundup(tiny, 4)
        			} else if size&1 == 0 {
        				tiny = roundup(tiny, 2)
        			}
        			size1 := size + (uintptr(tiny) - uintptr(unsafe.Pointer(c.tiny)))
        			if size1 <= tinysize {
        				// The object fits into existing tiny block.
        				x = tiny
        				c.tiny = (*byte)(add(x, size))
        				c.tinysize -= uintptr(size1)
        				c.local_tinyallocs++
        				if debugMalloc {
        					mp := acquirem()
        					if mp.mallocing == 0 {
        						gothrow("bad malloc")
        					}
        					mp.mallocing = 0
        					if mp.curg != nil {
        						mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
        					}
        					// Note: one releasem for the acquirem just above.
        					// The other for the acquirem at start of malloc.
        					releasem(mp)
        					releasem(mp)
        				}
        				return x
        			}
        		}
        		// Allocate a new maxTinySize block.
        		s = c.alloc[tinySizeClass]
        		v := s.freelist
        		if v == nil {
        			mp := acquirem()
        			mp.scalararg[0] = tinySizeClass
        			onM(mcacheRefill_m)
        			releasem(mp)
        			s = c.alloc[tinySizeClass]
        			v = s.freelist
        		}
        		s.freelist = v.next
        		s.ref++
        		//TODO: prefetch v.next
        		x = unsafe.Pointer(v)
        		(*[2]uint64)(x)[0] = 0
        		(*[2]uint64)(x)[1] = 0
        		// See if we need to replace the existing tiny block with the new one
        		// based on amount of remaining free space.
        		if maxTinySize-size > tinysize {
        			c.tiny = (*byte)(add(x, size))
        			c.tinysize = uintptr(maxTinySize - size)
        		}
        		size = maxTinySize
        	} else {
        		var sizeclass int8
        		if size <= 1024-8 {
        			sizeclass = size_to_class8[(size+7)>>3]
        		} else {
        			sizeclass = size_to_class128[(size-1024+127)>>7]
        		}
        		size = uintptr(class_to_size[sizeclass])
        		s = c.alloc[sizeclass]
        		v := s.freelist
        		if v == nil {
        			mp := acquirem()
        			mp.scalararg[0] = uintptr(sizeclass)
        			onM(mcacheRefill_m)
        			releasem(mp)
        			s = c.alloc[sizeclass]
        			v = s.freelist
        		}
        		s.freelist = v.next
        		s.ref++
        		//TODO: prefetch
        		x = unsafe.Pointer(v)
        		if flags&flagNoZero == 0 {
        			v.next = nil
        			if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
        				memclr(unsafe.Pointer(v), size)
        			}
        		}
        	}
        	c.local_cachealloc += intptr(size)
        } else {
        	mp := acquirem()
        	mp.scalararg[0] = uintptr(size)
        	mp.scalararg[1] = uintptr(flags)
        	onM(largeAlloc_m)
        	s = (*mspan)(mp.ptrarg[0])
        	mp.ptrarg[0] = nil
        	releasem(mp)
        	x = unsafe.Pointer(uintptr(s.start << pageShift))
        	size = uintptr(s.elemsize)
        }
     
        if flags&flagNoScan != 0 {
        	// All objects are pre-marked as noscan.
        	goto marked
        }
     
        // If allocating a defer+arg block, now that we've picked a malloc size
        // large enough to hold everything, cut the "asked for" size down to
        // just the defer header, so that the GC bitmap will record the arg block
        // as containing nothing at all (as if it were unused space at the end of
        // a malloc block caused by size rounding).
        // The defer arg areas are scanned as part of scanstack.
        if typ == deferType {
        	size0 = unsafe.Sizeof(_defer{})
        }
     
        // From here till marked label marking the object as allocated
        // and storing type info in the GC bitmap.
        {
        	arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
        	off := (uintptr(x) - arena_start) / ptrSize
        	xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
        	shift := (off % wordsPerBitmapByte) * gcBits
        	if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
        		println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
        		gothrow("bad bits in markallocated")
        	}
     
        	var ti, te uintptr
        	var ptrmask *uint8
        	if size == ptrSize {
        		// It's one word and it has pointers, it must be a pointer.
        		*xbits |= (bitsPointer << 2) << shift
        		goto marked
        	}
        	if typ.kind&kindGCProg != 0 {
        		nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
        		masksize := nptr
        		if masksize%2 != 0 {
        			masksize *= 2 // repeated
        		}
        		masksize = masksize * pointersPerByte / 8 // 4 bits per word
        		masksize++                                // unroll flag in the beginning
        		if masksize > maxGCMask && typ.gc[1] != 0 {
        			// If the mask is too large, unroll the program directly
        			// into the GC bitmap. It's 7 times slower than copying
        			// from the pre-unrolled mask, but saves 1/16 of type size
        			// memory for the mask.
        			mp := acquirem()
        			mp.ptrarg[0] = x
        			mp.ptrarg[1] = unsafe.Pointer(typ)
        			mp.scalararg[0] = uintptr(size)
        			mp.scalararg[1] = uintptr(size0)
        			onM(unrollgcproginplace_m)
        			releasem(mp)
        			goto marked
        		}
        		ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
        		// Check whether the program is already unrolled.
        		if uintptr(atomicloadp(unsafe.Pointer(ptrmask)))&0xff == 0 {
        			mp := acquirem()
        			mp.ptrarg[0] = unsafe.Pointer(typ)
        			onM(unrollgcprog_m)
        			releasem(mp)
        		}
        		ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
        	} else {
        		ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
        	}
        	if size == 2*ptrSize {
        		*xbits = *ptrmask | bitBoundary
        		goto marked
        	}
        	te = uintptr(typ.size) / ptrSize
        	// If the type occupies odd number of words, its mask is repeated.
        	if te%2 == 0 {
        		te /= 2
        	}
        	// Copy pointer bitmask into the bitmap.
        	for i := uintptr(0); i < size0; i += 2 * ptrSize {
        		v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
        		ti++
        		if ti == te {
        			ti = 0
        		}
        		if i == 0 {
        			v |= bitBoundary
        		}
        		if i+ptrSize == size0 {
        			v &^= uint8(bitPtrMask << 4)
        		}
     
        		*xbits = v
        		xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
        	}
        	if size0%(2*ptrSize) == 0 && size0 < size {
        		// Mark the word after last object's word as bitsDead.
        		*xbits = bitsDead << 2
        	}
        }
    marked:
        if raceenabled {
        	racemalloc(x, size)
        }
     
        if debugMalloc {
        	mp := acquirem()
        	if mp.mallocing == 0 {
        		gothrow("bad malloc")
        	}
        	mp.mallocing = 0
        	if mp.curg != nil {
        		mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
        	}
        	// Note: one releasem for the acquirem just above.
        	// The other for the acquirem at start of malloc.
        	releasem(mp)
        	releasem(mp)
        }
     
        if debug.allocfreetrace != 0 {
        	tracealloc(x, size, typ)
        }
     
        if rate := MemProfileRate; rate > 0 {
        	if size < uintptr(rate) && int32(size) < c.next_sample {
        		c.next_sample -= int32(size)
        	} else {
        		mp := acquirem()
        		profilealloc(mp, x, size)
        		releasem(mp)
        	}
        }
     
        if memstats.heap_alloc >= memstats.next_gc {
        	gogc(0)
        }
     
        return x
    }
    

    释放动态内存

    go不存在类似C中的free函数,动态内存的释放是由GC进行的,每次释放不是单独一个对象,而是一个span中n个对象

    // Free n objects from a span s back into the central free list c.
    // Called during sweep.
    // Returns true if the span was returned to heap.  Sets sweepgen to
    // the latest generation.
    // If preserve=true, don't return the span to heap nor relink in MCentral lists;
    // caller takes care of it.
    bool
    runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve)
    {
        bool wasempty;
     
        if(s->incache)
        	runtime·throw("freespan into cached span");
     
        // Add the objects back to s's free list.
        wasempty = s->freelist == nil;
        end->next = s->freelist;
        s->freelist = start;
        s->ref -= n;
     
        if(preserve) {
        	// preserve is set only when called from MCentral_CacheSpan above,
        	// the span must be in the empty list.
        	if(s->next == nil)
        		runtime·throw("can't preserve unlinked span");
        	runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
        	return false;
        }
     
        runtime·lock(&c->lock);
     
        // Move to nonempty if necessary.
        if(wasempty) {
        	runtime·MSpanList_Remove(s);
        	runtime·MSpanList_Insert(&c->nonempty, s);
        }
     
        // delay updating sweepgen until here.  This is the signal that
        // the span may be used in an MCache, so it must come after the
        // linked list operations above (actually, just after the
        // lock of c above.)
        runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
     
        if(s->ref != 0) {
        	runtime·unlock(&c->lock);
        	return false;
        }
     
        // s is completely freed, return it to the heap.
        runtime·MSpanList_Remove(s);
        s->needzero = 1;
        s->freelist = nil;
        runtime·unlock(&c->lock);
        runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
        runtime·MHeap_Free(&runtime·mheap, s, 0);
        return true;
    }
  • 相关阅读:
    WP8_给图片、按钮设置自定义图片
    WP8_读写XML
    JAVA编程思想读书笔记(五)--多线程
    《大话设计模式》--模板模式
    《大话设计模式》--原型模式
    《大话设计模式》--工厂方法模式
    《大话设计模式》--代理模式
    JAVA编程思想读书笔记(四)--对象的克隆
    《大话设计模式》--装饰者模式
    JAVA编程思想读书笔记(三)--RTTI
  • 原文地址:https://www.cnblogs.com/richmonkey/p/4509656.html
Copyright © 2020-2023  润新知