• go语言的GC


    mark&sweep, 2分钟保证至少一次GC过程,如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
    进行mark的过程中,会停止一切G的运行,mark的过程是多任务并发的
    sweep的过程是分散的

    mark过程

    整个程序内存块包括 .data, .bss, 每个G的stack, SpecialFinalizer
    每段内存都有其相应的bitmap,用来表示每个word(8BYTE)的标志位,每word需要4bit的标志位
    mark的过程就是递归遍历内存块bitmap的过程
    word标志位有如下3种类型:

    1. 标量

    2. 指针

    3. 连续两个word表示一个iface或eface

      // scanblock scans a block of n bytes starting at pointer b for references
      // to other objects, scanning any it finds recursively until there are no
      // unscanned objects left. Instead of using an explicit recursion, it keeps
      // a work list in the Workbuf* structures and loops in the main function
      // body. Keeping an explicit work list is easier on the stack allocator and
      // more efficient.
      static void
      scanblock(byte *b, uintptr n, byte *ptrmask)
      {
      byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
      uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
      Workbuf *wbuf;
      Iface *iface;
      Eface *eface;
      Type *typ;
      MSpan *s;
      pageID k;
      bool keepworking;

       // Cache memory arena parameters in local vars.
       arena_start = runtime·mheap.arena_start;
       arena_used = runtime·mheap.arena_used;
      
       wbuf = getempty(nil);
       nobj = wbuf->nobj;
       wp = &wbuf->obj[nobj];
       keepworking = b == nil;
       scanbufpos = 0;
       for(i = 0; i < nelem(scanbuf); i++)
       	scanbuf[i] = nil;
      
       ptrbitp = nil;
      
       // ptrmask can have 2 possible values:
       // 1. nil - obtain pointer mask from GC bitmap.
       // 2. pointer to a compact mask (for stacks and data).
       if(b != nil)
       	goto scanobj;
       for(;;) {
       	if(nobj == 0) {
       		// Out of work in workbuf.
       		// First, see is there is any work in scanbuf.
       		for(i = 0; i < nelem(scanbuf); i++) {
       			b = scanbuf[scanbufpos];
       			scanbuf[scanbufpos++] = nil;
       			scanbufpos %= nelem(scanbuf);
       			if(b != nil) {
       				n = arena_used - b; // scan until bitBoundary or BitsDead
       				ptrmask = nil; // use GC bitmap for pointer info
       				goto scanobj;
       			}
       		}
       		if(!keepworking) {
       			putempty(wbuf);
       			return;
       		}
       		// Refill workbuf from global queue.
       		wbuf = getfull(wbuf);
       		if(wbuf == nil)
       			return;
       		nobj = wbuf->nobj;
       		wp = &wbuf->obj[nobj];
       	}
      
       	// If another proc wants a pointer, give it some.
       	if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
       		wbuf->nobj = nobj;
       		wbuf = handoff(wbuf);
       		nobj = wbuf->nobj;
       		wp = &wbuf->obj[nobj];
       	}
      
       	wp--;
       	nobj--;
       	b = *wp;
       	n = arena_used - b; // scan until next bitBoundary or BitsDead
       	ptrmask = nil; // use GC bitmap for pointer info
      
       scanobj:
       	if(DebugPtrs)
       		runtime·printf("scanblock %p +%p %p
      ", b, n, ptrmask);
       	// Find bits of the beginning of the object.
       	if(ptrmask == nil) {
       		off = (uintptr*)b - (uintptr*)arena_start;
       		ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
       	}
       	for(i = 0; i < n; i += PtrSize) {
       		obj = nil;
       		// Find bits for this word.
               ......
      
       		if(bits <= BitsScalar) // BitsScalar || BitsDead
       			continue;
       		if(bits == BitsPointer) {
       			obj = *(byte**)(b+i);
       			obj0 = obj;
       			goto markobj;
       		}
      
       		// With those three out of the way, must be multi-word.
       		if(Debug && bits != BitsMultiWord)
       			runtime·throw("unexpected garbage collection bits");
       		// Find the next pair of bits.
       		if(ptrmask == nil) {
       			bits = *ptrbitp;
       			j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
       			ptrbitp -= j;
       			bits >>= gcBits*j;
       			bits = (bits>>2)&BitsMask;
       		} else
       			bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
      
       		if(Debug && bits != BitsIface && bits != BitsEface)
       			runtime·throw("unexpected garbage collection bits");
      
       		if(bits == BitsIface) {
       			iface = (Iface*)(b+i);
       			if(iface->tab != nil) {
       				typ = iface->tab->type;
       				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
       					obj = iface->data;
       			}
       		} else {
       			eface = (Eface*)(b+i);
       			typ = eface->type;
       			if(typ != nil) {
       				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
       					obj = eface->data;
       			}
       		}
      
       		i += PtrSize;
      
       		obj0 = obj;
       	markobj:
       		// At this point we have extracted the next potential pointer.
       		// Check if it points into heap.
       		if(obj == nil)
       			continue;
       		if(obj < arena_start || obj >= arena_used) {
       			if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
       				s = nil;
       				goto badobj;
       			}
       			continue;
       		}
       		// Mark the object.
       		obj = (byte*)((uintptr)obj & ~(PtrSize-1));
       		off = (uintptr*)obj - (uintptr*)arena_start;
       		bitp = arena_start - off/wordsPerBitmapByte - 1;
       		shift = (off % wordsPerBitmapByte) * gcBits;
       		xbits = *bitp;
       		bits = (xbits >> shift) & bitMask;
       		if((bits&bitBoundary) == 0) {
       			// Not a beginning of a block, consult span table to find the block beginning.
                   ......
                   
       			obj = p;
       			goto markobj;
       		}
       		if(DebugPtrs)
       			runtime·printf("scan *%p = %p => base %p
      ", b+i, obj0, obj);
      
       		if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
       			// Running garbage collection again because
       			// we want to find the path from a root to a bad pointer.
       			// Found possible next step; extend or finish path.
       			for(j=0; j<nbadblock; j++)
       				if(badblock[j] == (uintptr)b)
       					goto AlreadyBad;
       			runtime·printf("runtime: found *(%p+%p) = %p+%p
      ", b, i, obj0, (uintptr)(obj-obj0));
       			if(ptrmask != nil)
       				runtime·throw("bad pointer");
       			if(nbadblock >= nelem(badblock))
       				runtime·throw("badblock trace too long");
       			badblock[nbadblock++] = (uintptr)b;
       		AlreadyBad:;
       		}
      
       		// Now we have bits, bitp, and shift correct for
       		// obj pointing at the base of the object.
       		// Only care about not marked objects.
       		if((bits&bitMarked) != 0)
       			continue;
       		// If obj size is greater than 8, then each byte of GC bitmap
       		// contains info for at most one object. In such case we use
       		// non-atomic byte store to mark the object. This can lead
       		// to double enqueue of the object for scanning, but scanning
       		// is an idempotent operation, so it is OK. This cannot lead
       		// to bitmap corruption because the single marked bit is the
       		// only thing that can change in the byte.
       		// For 8-byte objects we use non-atomic store, if the other
       		// quadruple is already marked. Otherwise we resort to CAS
       		// loop for marking.
       		if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
       			runtime·work.nproc == 1)
       			*bitp = xbits | (bitMarked<<shift);
       		else
       			runtime·atomicor8(bitp, bitMarked<<shift);
      
       		if(((xbits>>(shift+2))&BitsMask) == BitsDead)
       			continue;  // noscan object
      
       		// Queue the obj for scanning.
       		PREFETCH(obj);
       		p = scanbuf[scanbufpos];
       		scanbuf[scanbufpos++] = obj;
       		scanbufpos %= nelem(scanbuf);
       		if(p == nil)
       			continue;
      
       		// If workbuf is full, obtain an empty one.
       		if(nobj >= nelem(wbuf->obj)) {
       			wbuf->nobj = nobj;
       			wbuf = getempty(wbuf);
       			nobj = wbuf->nobj;
       			wp = &wbuf->obj[nobj];
       		}
       		*wp = p;
       		wp++;
       		nobj++;
       	}
           ............
       }
      

      }

      static void
      markroot(ParFor *desc, uint32 i)
      {
      FinBlock *fb;
      MSpan *s;
      uint32 spanidx, sg;
      G *gp;
      void *p;
      uint32 status;
      bool restart;

       USED(&desc);
       // Note: if you add a case here, please also update heapdump.c:dumproots.
       switch(i) {
       case RootData:
       	scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
       	break;
      
       case RootBss:
       	scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
       	break;
      
       case RootFinalizers:
       	for(fb=runtime·allfin; fb; fb=fb->alllink)
       		scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
       	break;
      
       case RootSpans:
       	// mark MSpan.specials
           ......
       	break;
      
       case RootFlushCaches:
       	flushallmcaches();
       	break;
      
       default:
       	// the rest is scanning goroutine stacks
       	if(i - RootCount >= runtime·allglen)
       		runtime·throw("markroot: bad index");
       	gp = runtime·allg[i - RootCount];
       	// remember when we've first observed the G blocked
       	// needed only to output in traceback
       	status = runtime·readgstatus(gp);
       	if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
       		gp->waitsince = runtime·work.tstart;
       	// Shrink a stack if not much of it is being used.
       	runtime·shrinkstack(gp);
       	if(runtime·readgstatus(gp) == Gdead) 
       		gp->gcworkdone = true;
       	else 
       		gp->gcworkdone = false; 
       	restart = runtime·stopg(gp);
       	scanstack(gp);
       	if(restart)
       		runtime·restartg(gp);
       	break;
       }
      

      }

    sweep过程

    sweep扫描span里面每一个对象是否marked,将未marked的对象放入span的freelist中
    如果span中的所有对象都进入了freelist,那么会将span的内存释放到heap中。

    // sweeps one span
    // returns number of pages returned to heap, or -1 if there is nothing to sweep
    uintptr
    runtime·sweepone(void)
    {
        MSpan *s;
        uint32 idx, sg;
        uintptr npages;
     
        // increment locks to ensure that the goroutine is not preempted
        // in the middle of sweep thus leaving the span in an inconsistent state for next GC
        g->m->locks++;
        sg = runtime·mheap.sweepgen;
        for(;;) {
        	idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
        	if(idx >= runtime·work.nspan) {
        		runtime·mheap.sweepdone = true;
        		g->m->locks--;
        		return -1;
        	}
        	s = runtime·work.spans[idx];
        	if(s->state != MSpanInUse) {
        		s->sweepgen = sg;
        		continue;
        	}
        	if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
        		continue;
        	npages = s->npages;
        	if(!runtime·MSpan_Sweep(s, false))
        		npages = 0;
        	g->m->locks--;
        	return npages;
        }
    }
     
    // Sweep frees or collects finalizers for blocks not marked in the mark phase.
    // It clears the mark bits in preparation for the next GC round.
    // Returns true if the span was returned to heap.
    // If preserve=true, don't return it to heap nor relink in MCentral lists;
    // caller takes care of it.
    bool
    runtime·MSpan_Sweep(MSpan *s, bool preserve)
    {
        int32 cl, n, npages, nfree;
        uintptr size, off, step;
        uint32 sweepgen;
        byte *p, *bitp, shift, xbits, bits;
        MCache *c;
        byte *arena_start;
        MLink head, *end, *link;
        Special *special, **specialp, *y;
        bool res, sweepgenset;
     
        // It's critical that we enter this function with preemption disabled,
        // GC must not start while we are in the middle of this function.
        if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
        	runtime·throw("MSpan_Sweep: m is not locked");
        sweepgen = runtime·mheap.sweepgen;
        if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
        	runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d
    ",
        		s->state, s->sweepgen, sweepgen);
        	runtime·throw("MSpan_Sweep: bad span state");
        }
        arena_start = runtime·mheap.arena_start;
        cl = s->sizeclass;
        size = s->elemsize;
        if(cl == 0) {
        	n = 1;
        } else {
        	// Chunk full of small blocks.
        	npages = runtime·class_to_allocnpages[cl];
        	n = (npages << PageShift) / size;
        }
        res = false;
        nfree = 0;
        end = &head;
        c = g->m->mcache;
        sweepgenset = false;
     
        // Mark any free objects in this span so we don't collect them.
        for(link = s->freelist; link != nil; link = link->next) {
        	off = (uintptr*)link - (uintptr*)arena_start;
        	bitp = arena_start - off/wordsPerBitmapByte - 1;
        	shift = (off % wordsPerBitmapByte) * gcBits;
        	*bitp |= bitMarked<<shift;
        }
     
        // Unlink & free special records for any objects we're about to free.
        specialp = &s->specials;
        special = *specialp;
        while(special != nil) {
        	// A finalizer can be set for an inner byte of an object, find object beginning.
        	p = (byte*)(s->start << PageShift) + special->offset/size*size;
        	off = (uintptr*)p - (uintptr*)arena_start;
        	bitp = arena_start - off/wordsPerBitmapByte - 1;
        	shift = (off % wordsPerBitmapByte) * gcBits;
        	bits = (*bitp>>shift) & bitMask;
        	if((bits&bitMarked) == 0) {
        		// Find the exact byte for which the special was setup
        		// (as opposed to object beginning).
        		p = (byte*)(s->start << PageShift) + special->offset;
        		// about to free object: splice out special record
        		y = special;
        		special = special->next;
        		*specialp = special;
        		if(!runtime·freespecial(y, p, size, false)) {
        			// stop freeing of object if it has a finalizer
        			*bitp |= bitMarked << shift;
        		}
        	} else {
        		// object is still live: keep special record
        		specialp = &special->next;
        		special = *specialp;
        	}
        }
     
        // Sweep through n objects of given size starting at p.
        // This thread owns the span now, so it can manipulate
        // the block bitmap without atomic operations.
        p = (byte*)(s->start << PageShift);
        // Find bits for the beginning of the span.
        off = (uintptr*)p - (uintptr*)arena_start;
        bitp = arena_start - off/wordsPerBitmapByte - 1;
        shift = 0;
        step = size/(PtrSize*wordsPerBitmapByte);
        // Rewind to the previous quadruple as we move to the next
        // in the beginning of the loop.
        bitp += step;
        if(step == 0) {
        	// 8-byte objects.
        	bitp++;
        	shift = gcBits;
        }
        for(; n > 0; n--, p += size) {
        	bitp -= step;
        	if(step == 0) {
        		if(shift != 0)
        			bitp--;
        		shift = gcBits - shift;
        	}
     
        	xbits = *bitp;
        	bits = (xbits>>shift) & bitMask;
     
        	// Allocated and marked object, reset bits to allocated.
        	if((bits&bitMarked) != 0) {
        		*bitp &= ~(bitMarked<<shift);
        		continue;
        	}
        	// At this point we know that we are looking at garbage object
        	// that needs to be collected.
        	if(runtime·debug.allocfreetrace)
        		runtime·tracefree(p, size);
        	// Reset to allocated+noscan.
        	*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
        	if(cl == 0) {
        		// Free large span.
        		if(preserve)
        			runtime·throw("can't preserve large span");
        		runtime·unmarkspan(p, s->npages<<PageShift);
        		s->needzero = 1;
        		// important to set sweepgen before returning it to heap
        		runtime·atomicstore(&s->sweepgen, sweepgen);
        		sweepgenset = true;
        		// NOTE(rsc,dvyukov): The original implementation of efence
        		// in CL 22060046 used SysFree instead of SysFault, so that
        		// the operating system would eventually give the memory
        		// back to us again, so that an efence program could run
        		// longer without running out of memory. Unfortunately,
        		// calling SysFree here without any kind of adjustment of the
        		// heap data structures means that when the memory does
        		// come back to us, we have the wrong metadata for it, either in
        		// the MSpan structures or in the garbage collection bitmap.
        		// Using SysFault here means that the program will run out of
        		// memory fairly quickly in efence mode, but at least it won't
        		// have mysterious crashes due to confused memory reuse.
        		// It should be possible to switch back to SysFree if we also
        		// implement and then call some kind of MHeap_DeleteSpan.
        		if(runtime·debug.efence) {
        			s->limit = nil;	// prevent mlookup from finding this span
        			runtime·SysFault(p, size);
        		} else
        			runtime·MHeap_Free(&runtime·mheap, s, 1);
        		c->local_nlargefree++;
        		c->local_largefree += size;
        		runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
        		res = true;
        	} else {
        		// Free small object.
        		if(size > 2*sizeof(uintptr))
        			((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
        		else if(size > sizeof(uintptr))
        			((uintptr*)p)[1] = 0;
     
        		end->next = (MLink*)p;
        		end = (MLink*)p;
        		nfree++;
        	}
        }
     
        // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
        // because of the potential for a concurrent free/SetFinalizer.
        // But we need to set it before we make the span available for allocation
        // (return it to heap or mcentral), because allocation code assumes that a
        // span is already swept if available for allocation.
     
        if(!sweepgenset && nfree == 0) {
        	// The span must be in our exclusive ownership until we update sweepgen,
        	// check for potential races.
        	if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
        		runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d
    ",
        			s->state, s->sweepgen, sweepgen);
        		runtime·throw("MSpan_Sweep: bad span state after sweep");
        	}
        	runtime·atomicstore(&s->sweepgen, sweepgen);
        }
        if(nfree > 0) {
        	c->local_nsmallfree[cl] += nfree;
        	c->local_cachealloc -= nfree * size;
        	runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
        	res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
        	// MCentral_FreeSpan updates sweepgen
        }
        return res;
    }
  • 相关阅读:
    HDFS架构原理
    Hadoop集群搭建
    解决8080端口号占用问题
    基于SSM的Maven项目(Redis和Mysql)配置文件整合
    maven 集成SSM项目配置文件模版
    初识Spring笔记
    初识Mybatis一些总结
    将对数据库的增删改查封装为方法
    10分钟安装Elasticsearch
    ThreadLocal详解
  • 原文地址:https://www.cnblogs.com/richmonkey/p/4509659.html
Copyright © 2020-2023  润新知