最早学习C、C++语言时,它们都是把内存的管理全部交给开发者,这种方式最灵活但是也最容易出问题,对人员要求极高;后来出现的一些高级语言像Java、JavaScript、C#、Go,都有语言自身解决了内存分配和回收问题,降低开发门槛,释放生产力。然而对于想要深入理解原理的同学来说却带来了负担,本篇文章主要从内存分配角度来梳理个人理解,后续文章中会介绍Go的垃圾回收机制。
void *malloc(size_t size) { void *block; block = sbrk(size); if (block == (void *) -1) { return NULL; } return block; }
typedef char ALIGN[16]; // padding字节对齐使用 union header { struct { size_t size; // 块大小 unsigned is_free; // 是否有在使用 union header *next; // 下一个块的地址 } s; ALIGN stub; }; typedef union header header_t;
header_t *head, *tail
pthread_mutex_t global_malloc_lock;
void *malloc(size_t size) { size_t total_size; void *block; header_t *header; if (!size) // 如果size为0或者NULL直接返回null return NULL; pthread_mutex_lock(&global_malloc_lock); // 全局加锁 header = get_free_block(size); // 先从已空闲区域找一块合适大小的内存 if (header) { // 如果能找到就直接使用,无需每次向操作系统申请 header->s.is_free = 0; // 标志这块区域非空闲 pthread_mutex_unlock(&global_malloc_lock); // 解锁 // 这个header对外部应该是完全隐藏的,真正用户需要的内存在header尾部的下一个位置 return (void*)(header + 1); } // 如果空闲区域没有则向操作系统申请一块内存,因为我们需要header存储一些元数据 // 所以这里要申请的内存实际是元数据区+用户实际需要的大小 total_size = sizeof(header_t) + size; block = sbrk(total_size); if (block == (void*) -1) { // 获取失败解锁、返回NULL pthread_mutex_unlock(&global_malloc_lock); return NULL; } // 申请成功设置元数据信息 header = block; header->s.size = size; header->s.is_free = 0; header->s.next = NULL; // 更新链表对应指针 if (!head) head = header; if (tail) tail->s.next = header; tail = header; // 解锁返回给用户内存 pthread_mutex_unlock(&global_malloc_lock); return (void*)(header + 1); } // 这个函数从链表中已有的内存块进行判断是否存在空闲的,并且能够容得下申请区域的内存块 // 有则返回,每次都从头遍历,暂不考虑性能和内存碎片问题。 header_t *get_free_block(size_t size) { header_t *curr = head; while(curr) { if (curr->s.is_free && curr->s.size >= size) return curr; curr = curr->s.next; } return NULL; }
void free(void *block) { header_t *header, *tmp; void *programbreak; if (!block) return; pthread_mutex_lock(&global_malloc_lock); // 全局加锁 header = (header_t*)block - 1; // block转变为header_t为单位的结构,并向前移动一个单位,也就是拿到了这个块的元数据的起始地址 programbreak = sbrk(0); // 获取当前brk指针的位置 if ((char*)block + header->s.size == programbreak) { // 如果当前内存块的末尾位置(即tail块)刚好是brk指针位置就把它还给操作系统 if (head == tail) { // 只有一个块,直接将链表设置为空 head = tail = NULL; } else {// 存在多个块,则找到tail的前一个快,并把它next设置为NULL tmp = head; while (tmp) { if(tmp->s.next == tail) { tmp->s.next = NULL; tail = tmp; } tmp = tmp->s.next; } } // 将内存还给操作系统 sbrk(0 - sizeof(header_t) - header->s.size); pthread_mutex_unlock(&global_malloc_lock); // 解锁 return; } // 如果不是最后的链表就标志位free,后面可以复用 header->s.is_free = 1; pthread_mutex_unlock(&global_malloc_lock); }
// path: /usr/local/go/src/runtime/sizeclasses.go const _NumSizeClasses = 67 var class_to_size = [_NumSizeClasses]uint16{0, 8, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024, 1152, 1280, 1408, 1536,1792, 2048, 2304, 2688, 3072, 3200, 3456, 4096, 4864, 5376, 6144, 6528, 6784, 6912, 8192, 9472, 9728, 10240, 10880, 12288, 13568, 14336, 16384, 18432, 19072, 20480, 21760, 24576, 27264, 28672, 32768}
// path: /usr/local/go/src/runtime/sizeclasses.go const _NumSizeClasses = 67 var class_to_allocnpages = [_NumSizeClasses]uint8{0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 3, 2, 3, 1, 3, 2, 3, 4, 5, 6, 1, 7, 6, 5, 4, 3, 5, 7, 2, 9, 7, 5, 8, 3, 10, 7, 4}
// path: /usr/local/go/src/runtime/mheap.go type mspan struct { //链表前向指针,用于将span链接起来 next *mspan //链表前向指针,用于将span链接起来 prev *mspan // 起始地址,也即所管理页的地址 startAddr uintptr // 管理的页数 npages uintptr // 块个数,表示有多少个块可供分配 nelems uintptr // 用来辅助确定当前span中的元素分配到了哪里 freeindex uintptr //分配位图,每一位代表一个块是否已分配 allocBits *gcBits // allocBits的补码,以用来快速查找内存中未被使用的内存 allocCache unit64 // 已分配块的个数 allocCount uint16 // class表中的class ID,和Size Classs相关 spanclass spanClass // class表中的对象大小,也即块大小 elemsize uintptr // GC中来标记哪些块已经释放了 gcmarkBits *gcBits }
//path: /usr/local/go/src/runtime/mcache.go type mcache struct { // mcache中对应各个等级的span都会有两份缓存 alloc [numSpanClasses]*mspan // 下面三个是在微小对象分配时专门使用 tiny uintptr tinyoffset uintptr local_tinyallocs uintptr } numSpanClasses = _NumSizeClasses << 1
// init initializes pp, which may be a freshly allocated p or a // previously destroyed p, and transitions it to status _Pgcstop. func (pp *p) init(id int32) { pp.id = id //////// ......... ///////// if pp.mcache == nil { if id == 0 { if mcache0 == nil { throw("missing mcache?") } // Use the bootstrap mcache0. Only one P will get // mcache0: the one with ID 0. pp.mcache = mcache0 } else { pp.mcache = allocmcache() } } .......... }
// dummy mspan that contains no free objects. var emptymspan mspan func allocmcache() *mcache { var c *mcache // 在系统栈中调用mheap的缓存分配器创建mcache systemstack(func() { lock(&mheap_.lock) // mheap是所有协程共用的需要加锁访问 c = (*mcache)(mheap_.cachealloc.alloc()) c.flushGen = mheap_.sweepgen unlock(&mheap_.lock) }) // 将alloc数组设置为空span for i := range c.alloc { c.alloc[i] = &emptymspan } c.nextSample = nextSample() return c }
// refill acquires a new span of span class spc for c. This span will // have at least one free object. The current span in c must be full. // // Must run in a non-preemptible context since otherwise the owner of // c could change. func (c *mcache) refill(spc spanClass) { // Return the current cached span to the central lists. s := c.alloc[spc] ............... if s != &emptymspan { // Mark this span as no longer cached. if s.sweepgen != mheap_.sweepgen+3 { throw("bad sweepgen in refill") } mheap_.central[spc].mcentral.uncacheSpan(s) } // Get a new cached span from the central lists. s = mheap_.central[spc].mcentral.cacheSpan() ................ ............... c.alloc[spc] = s }
type mcentral struct { spanclass spanClass partial [2]spanSet // list of spans with a free object full [2]spanSet // list of spans with no free objects }
type spanSet struct { spineLock mutex spine unsafe.Pointer // *[N]*spanSetBlock, accessed atomically spineLen uintptr // Spine array length, accessed atomically spineCap uintptr // Spine array cap, accessed under lock index headTailIndex }
off := c.tinyoffset // Align tiny pointer for required (conservative) alignment. if size&7 == 0 { off = alignUp(off, 8) } else if sys.PtrSize == 4 && size == 12 { // Conservatively align 12-byte objects to 8 bytes on 32-bit // systems so that objects whose first field is a 64-bit // value is aligned to 8 bytes and does not cause a fault on // atomic access. See issue 37262. // TODO(mknyszek): Remove this workaround if/when issue 36606 // is resolved. off = alignUp(off, 8) } else if size&3 == 0 { off = alignUp(off, 4) } else if size&1 == 0 { off = alignUp(off, 2) }
if off+size <= maxTinySize && c.tiny != 0 { // The object fits into existing tiny block. x = unsafe.Pointer(c.tiny + off) c.tinyoffset = off + size c.tinyAllocs++ mp.mallocing = 0 releasem(mp) return x }
// Allocate a new maxTinySize block. span = c.alloc[tinySpanClass] v := nextFreeFast(span) if v == 0 { v, span, shouldhelpgc = c.nextFree(tinySpanClass) } x = unsafe.Pointer(v) (*[2]uint64)(x)[0] = 0 (*[2]uint64)(x)[1] = 0 // See if we need to replace the existing tiny block with the new one // based on amount of remaining free space. if !raceenabled && (size < c.tinyoffset || c.tiny == 0) { // Note: disabled when race detector is on, see comment near end of this function. c.tiny = uintptr(x) c.tinyoffset = size } size = maxTinySize
var sizeclass uint8 if size <= smallSizeMax-8 { sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)] } else { sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)] } size = uintptr(class_to_size[sizeclass]) spc := makeSpanClass(sizeclass, noscan) span = c.alloc[spc] v := nextFreeFast(span) if v == 0 { v, span, shouldhelpgc = c.nextFree(spc) } x = unsafe.Pointer(v) if needzero && span.needzero != 0 { memclrNoHeapPointers(unsafe.Pointer(v), size) }
// nextFreeFast returns the next free object if one is quickly available. // Otherwise it returns 0. func nextFreeFast(s *mspan) gclinkptr { theBit := sys.Ctz64(s.allocCache) // Is there a free object in the allocCache? if theBit < 64 { result := s.freeindex + uintptr(theBit) if result < s.nelems { freeidx := result + 1 if freeidx%64 == 0 && freeidx != s.nelems { return 0 } s.allocCache >>= uint(theBit + 1) s.freeindex = freeidx s.allocCount++ return gclinkptr(result*s.elemsize + s.base()) } } return 0 }
// nextFreeIndex returns the index of the next free object in s at // or after s.freeindex. // There are hardware instructions that can be used to make this // faster if profiling warrants it. func (s *mspan) nextFreeIndex() uintptr { sfreeindex := s.freeindex snelems := s.nelems if sfreeindex == snelems { return sfreeindex } if sfreeindex > snelems { throw("s.freeindex > s.nelems") } aCache := s.allocCache bitIndex := sys.Ctz64(aCache) for bitIndex == 64 { // Move index to start of next cached bits. sfreeindex = (sfreeindex + 64) &^ (64 - 1) if sfreeindex >= snelems { s.freeindex = snelems return snelems } whichByte := sfreeindex / 8 // Refill s.allocCache with the next 64 alloc bits. s.refillAllocCache(whichByte) aCache = s.allocCache bitIndex = sys.Ctz64(aCache) // nothing available in cached bits // grab the next 8 bytes and try again. } result := sfreeindex + uintptr(bitIndex) if result >= snelems { s.freeindex = snelems return snelems } s.allocCache >>= uint(bitIndex + 1) sfreeindex = result + 1 if sfreeindex%64 == 0 && sfreeindex != snelems { // We just incremented s.freeindex so it isn't 0. // As each 1 in s.allocCache was encountered and used for allocation // it was shifted away. At this point s.allocCache contains all 0s. // Refill s.allocCache so that it corresponds // to the bits at s.allocBits starting at s.freeindex. whichByte := sfreeindex / 8 s.refillAllocCache(whichByte) } s.freeindex = sfreeindex return result }
func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, shouldhelpgc bool) { s = c.alloc[spc] shouldhelpgc = false freeIndex := s.nextFreeIndex() // 获取可分配的元素位置 if freeIndex == s.nelems { //如果当前span没有可分配空间,调用refill方法把当前span交给mcentral的full队列 // 并从mcentral的partial队列取一个有空闲的span放到mcache上 // The span is full. if uintptr(s.allocCount) != s.nelems { println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems) throw("s.allocCount != s.nelems && freeIndex == s.nelems") } c.refill(spc) shouldhelpgc = true s = c.alloc[spc] freeIndex = s.nextFreeIndex() // 在新获取的span中重新计算freeIndex } if freeIndex >= s.nelems { throw("freeIndex is not valid") } v = gclinkptr(freeIndex*s.elemsize + s.base()) // 获取span中数据的起始地址加上当前已分配的区域获取一个可分配的空闲区域 s.allocCount++ if uintptr(s.allocCount) > s.nelems { println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems) throw("s.allocCount > s.nelems") } return }
func (c *mcache) refill(spc spanClass) { // Return the current cached span to the central lists. s := c.alloc[spc] ............... if s != &emptymspan { // Mark this span as no longer cached. if s.sweepgen != mheap_.sweepgen+3 { throw("bad sweepgen in refill") } mheap_.central[spc].mcentral.uncacheSpan(s) } // Get a new cached span from the central lists. s = mheap_.central[spc].mcentral.cacheSpan() ................ ............... c.alloc[spc] = s }
// Allocate a span to use in an mcache. func (c *mcentral) cacheSpan() *mspan { // Deduct credit for this span allocation and sweep if necessary. spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize deductSweepCredit(spanBytes, 0) traceDone := false if trace.enabled { traceGCSweepStart() } spanBudget := 100 var s *mspan sl := newSweepLocker() sg := sl.sweepGen // Try partial swept spans first. if s = c.partialSwept(sg).pop(); s != nil { goto havespan } // Now try partial unswept spans. for ; spanBudget >= 0; spanBudget-- { s = c.partialUnswept(sg).pop() if s == nil { break } if s, ok := sl.tryAcquire(s); ok { // We got ownership of the span, so let's sweep it and use it. s.sweep(true) sl.dispose() goto havespan } } // Now try full unswept spans, sweeping them and putting them into the // right list if we fail to get a span. for ; spanBudget >= 0; spanBudget-- { s = c.fullUnswept(sg).pop() if s == nil { break } if s, ok := sl.tryAcquire(s); ok { // We got ownership of the span, so let's sweep it. s.sweep(true) // Check if there's any free space. freeIndex := s.nextFreeIndex() if freeIndex != s.nelems { s.freeindex = freeIndex sl.dispose() goto havespan } // Add it to the swept list, because sweeping didn't give us any free space. c.fullSwept(sg).push(s.mspan) } // See comment for partial unswept spans. } sl.dispose() if trace.enabled { traceGCSweepDone() traceDone = true } // We failed to get a span from the mcentral so get one from mheap. s = c.grow() if s == nil { return nil } // At this point s is a span that should have free slots. havespan: if trace.enabled && !traceDone { traceGCSweepDone() } n := int(s.nelems) - int(s.allocCount) if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems { throw("span has no free objects") } freeByteBase := s.freeindex &^ (64 - 1) whichByte := freeByteBase / 8 // Init alloc bits cache. s.refillAllocCache(whichByte) // Adjust the allocCache so that s.freeindex corresponds to the low bit in // s.allocCache. s.allocCache >>= s.freeindex % 64 return s }
func (c *mcentral) uncacheSpan(s *mspan) { if s.allocCount == 0 { throw("uncaching span but s.allocCount == 0") } sg := mheap_.sweepgen stale := s.sweepgen == sg+1 // Fix up sweepgen. if stale { // Span was cached before sweep began. It's our // responsibility to sweep it. // // Set sweepgen to indicate it's not cached but needs // sweeping and can't be allocated from. sweep will // set s.sweepgen to indicate s is swept. atomic.Store(&s.sweepgen, sg-1) } else { // Indicate that s is no longer cached. atomic.Store(&s.sweepgen, sg) } // Put the span in the appropriate place. if stale { // It's stale, so just sweep it. Sweeping will put it on // the right list. // // We don't use a sweepLocker here. Stale cached spans // aren't in the global sweep lists, so mark termination // itself holds up sweep completion until all mcaches // have been swept. ss := sweepLocked{s} ss.sweep(false) } else { if int(s.nelems)-int(s.allocCount) > 0 { // Put it back on the partial swept list. c.partialSwept(sg).push(s) } else { // There's no free space and it's not stale, so put it on the // full swept list. c.fullSwept(sg).push(s) } } }
type mcentral struct { spanclass spanClass partial [2]spanSet // list of spans with a free object full [2]spanSet // list of spans with no free objects }
// grow allocates a new empty span from the heap and initializes it for c's size class. func (c *mcentral) grow() *mspan { npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) size := uintptr(class_to_size[c.spanclass.sizeclass()]) s, _ := mheap_.alloc(npages, c.spanclass, true) if s == nil { return nil } // Use division by multiplication and shifts to quickly compute: // n := (npages << _PageShift) / size n := s.divideByElemSize(npages << _PageShift) s.limit = s.base() + size*n heapBitsForAddr(s.base()).initSpan(s) return s }
func (h *mheap) alloc(npages uintptr, spanclass spanClass, needzero bool) (*mspan, bool) { // Don't do any operations that lock the heap on the G stack. // It might trigger stack growth, and the stack growth code needs // to be able to allocate heap. var s *mspan systemstack(func() { // To prevent excessive heap growth, before allocating n pages // we need to sweep and reclaim at least n pages. if !isSweepDone() { h.reclaim(npages) } s = h.allocSpan(npages, spanAllocHeap, spanclass) }) if s == nil { return nil, false } isZeroed := s.needzero == 0 if needzero && !isZeroed { memclrNoHeapPointers(unsafe.Pointer(s.base()), s.npages<<_PageShift) isZeroed = true } s.needzero = 0 return s, isZeroed }
func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass) (s *mspan) { // Function-global state. gp := getg() base, scav := uintptr(0), uintptr(0) // On some platforms we need to provide physical page aligned stack // allocations. Where the page size is less than the physical page // size, we already manage to do this by default. needPhysPageAlign := physPageAlignedStacks && typ == spanAllocStack && pageSize < physPageSize // If the allocation is small enough, try the page cache! // The page cache does not support aligned allocations, so we cannot use // it if we need to provide a physical page aligned stack allocation. pp := gp.m.p.ptr() if !needPhysPageAlign && pp != nil && npages < pageCachePages/4 { c := &pp.pcache // If the cache is empty, refill it. if c.empty() { lock(&h.lock) *c = h.pages.allocToCache() unlock(&h.lock) } // Try to allocate from the cache. base, scav = c.alloc(npages) if base != 0 { s = h.tryAllocMSpan() if s != nil { goto HaveSpan } // We have a base but no mspan, so we need // to lock the heap. } }
// 代表pageCache能够使用的空间数,8x64一共是512kb空间 const pageCachePages = 8 * unsafe.Sizeof(pageCache{}.cache) // pageCache represents a per-p cache of pages the allocator can // allocate from without a lock. More specifically, it represents // a pageCachePages*pageSize chunk of memory with 0 or more free // pages in it. type pageCache struct { base uintptr // base代表该虚拟内存的基线地址 // cache和scav都是起到位图标记的作用,cache主要是标记哪些内存位置已经被使用了,scav标记已经被清除的区域 // 用来加速垃圾未收,在垃圾回收一定条件下两个可以互换,提升分配和垃圾回收效率。 cache uint64 // 64-bit bitmap representing free pages (1 means free) scav uint64 // 64-bit bitmap representing scavenged pages (1 means scavenged) }
// For one reason or another, we couldn't get the // whole job done without the heap lock. lock(&h.lock) ................. if base == 0 { // Try to acquire a base address. base, scav = h.pages.alloc(npages) if base == 0 { if !h.grow(npages) { unlock(&h.lock) return nil } base, scav = h.pages.alloc(npages) if base == 0 { throw("grew heap, but no adequate free space found") } } } ................ unlock(&h.lock)
//path: /usr/local/go/src/runtime/mheap.go type mheap struct { lock mutex // spans: 指向mspans区域,用于映射mspan和page的关系 spans []*mspan // 指向bitmap首地址,bitmap是从高地址向低地址增长的 bitmap uintptr // 指示arena区首地址 arena_start uintptr // 指示arena区已使用地址位置 arena_used uintptr // 指示arena区末地址 arena_end uintptr central [67*2]struct { mcentral mcentral pad [sys.CacheLineSize - unsafe.Sizeof(mcentral{})%sys.CacheLineSize]byte } }