Commit 1e063b32 authored by Russ Cox's avatar Russ Cox

runtime: faster allocator, garbage collector

GC is still single-threaded.
Multiple threads will happen in another CL.

Garbage collection pauses are typically
about half as long as they were before this CL.

R=brainman, iant, r
CC=golang-dev
https://golang.org/cl/3975046
parent 6b93a92a
...@@ -69,7 +69,8 @@ type MemStatsType struct { ...@@ -69,7 +69,8 @@ type MemStatsType struct {
// Per-size allocation statistics. // Per-size allocation statistics.
// Not locked during update; approximate. // Not locked during update; approximate.
BySize [67]struct { // 61 is NumSizeClasses in the C code.
BySize [61]struct {
Size uint32 Size uint32
Mallocs uint64 Mallocs uint64
Frees uint64 Frees uint64
......
...@@ -702,7 +702,7 @@ unsafe·New(Eface typ, void *ret) ...@@ -702,7 +702,7 @@ unsafe·New(Eface typ, void *ret)
t = (Type*)((Eface*)typ.data-1); t = (Type*)((Eface*)typ.data-1);
if(t->kind&KindNoPointers) if(t->kind&KindNoPointers)
ret = runtime·mallocgc(t->size, RefNoPointers, 1, 1); ret = runtime·mallocgc(t->size, FlagNoPointers, 1, 1);
else else
ret = runtime·mal(t->size); ret = runtime·mal(t->size);
FLUSH(&ret); FLUSH(&ret);
...@@ -722,7 +722,7 @@ unsafe·NewArray(Eface typ, uint32 n, void *ret) ...@@ -722,7 +722,7 @@ unsafe·NewArray(Eface typ, uint32 n, void *ret)
size = n*t->size; size = n*t->size;
if(t->kind&KindNoPointers) if(t->kind&KindNoPointers)
ret = runtime·mallocgc(size, RefNoPointers, 1, 1); ret = runtime·mallocgc(size, FlagNoPointers, 1, 1);
else else
ret = runtime·mal(size); ret = runtime·mal(size);
FLUSH(&ret); FLUSH(&ret);
......
This diff is collapsed.
...@@ -97,8 +97,14 @@ typedef uintptr PageID; // address >> PageShift ...@@ -97,8 +97,14 @@ typedef uintptr PageID; // address >> PageShift
enum enum
{ {
// Computed constant. The definition of MaxSmallSize and the
// algorithm in msize.c produce some number of different allocation
// size classes. NumSizeClasses is that number. It's needed here
// because there are static arrays of this length; when msize runs its
// size choosing algorithm it double-checks that NumSizeClasses agrees.
NumSizeClasses = 61,
// Tunable constants. // Tunable constants.
NumSizeClasses = 67, // Number of size classes (must match msize.c)
MaxSmallSize = 32<<10, MaxSmallSize = 32<<10,
FixAllocChunk = 128<<10, // Chunk size for FixAlloc FixAllocChunk = 128<<10, // Chunk size for FixAlloc
...@@ -290,10 +296,7 @@ struct MSpan ...@@ -290,10 +296,7 @@ struct MSpan
uint32 ref; // number of allocated objects in this span uint32 ref; // number of allocated objects in this span
uint32 sizeclass; // size class uint32 sizeclass; // size class
uint32 state; // MSpanInUse etc uint32 state; // MSpanInUse etc
union { byte *limit; // end of data in span
uint32 *gcref; // sizeclass > 0
uint32 gcref0; // sizeclass == 0
};
}; };
void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages); void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages);
...@@ -336,6 +339,7 @@ struct MHeap ...@@ -336,6 +339,7 @@ struct MHeap
// range of addresses we might see in the heap // range of addresses we might see in the heap
byte *bitmap; byte *bitmap;
uintptr bitmap_mapped;
byte *arena_start; byte *arena_start;
byte *arena_used; byte *arena_used;
byte *arena_end; byte *arena_end;
...@@ -359,26 +363,29 @@ MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct ...@@ -359,26 +363,29 @@ MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct
void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct); void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
MSpan* runtime·MHeap_Lookup(MHeap *h, void *v); MSpan* runtime·MHeap_Lookup(MHeap *h, void *v);
MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v); MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v);
void runtime·MGetSizeClassInfo(int32 sizeclass, int32 *size, int32 *npages, int32 *nobj); void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n); void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
void runtime·MHeap_MapBits(MHeap *h);
void* runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed); void* runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed);
int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s, uint32 **ref); int32 runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);
void runtime·gc(int32 force); void runtime·gc(int32 force);
void runtime·markallocated(void *v, uintptr n, bool noptr);
void runtime·checkallocated(void *v, uintptr n);
void runtime·markfreed(void *v, uintptr n);
void runtime·checkfreed(void *v, uintptr n);
int32 runtime·checking;
void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
void runtime·unmarkspan(void *v, uintptr size);
bool runtime·blockspecial(void*);
void runtime·setblockspecial(void*);
enum enum
{ {
RefcountOverhead = 4, // one uint32 per object // flags to malloc
FlagNoPointers = 1<<0, // no pointers here
RefFree = 0, // must be zero FlagNoProfiling = 1<<1, // must not profile
RefStack, // stack segment - don't free and don't scan for pointers FlagNoGC = 1<<2, // must not free or scan for pointers
RefNone, // no references
RefSome, // some references
RefNoPointers = 0x80000000U, // flag - no pointers here
RefHasFinalizer = 0x40000000U, // flag - has finalizer
RefProfiled = 0x20000000U, // flag - is in profiling table
RefNoProfiling = 0x10000000U, // flag - must not profile
RefFlags = 0xFFFF0000U,
}; };
void runtime·MProf_Malloc(void*, uintptr); void runtime·MProf_Malloc(void*, uintptr);
......
...@@ -113,8 +113,7 @@ static void ...@@ -113,8 +113,7 @@ static void
MCentral_Free(MCentral *c, void *v) MCentral_Free(MCentral *c, void *v)
{ {
MSpan *s; MSpan *s;
PageID page; MLink *p;
MLink *p, *next;
int32 size; int32 size;
// Find span for v. // Find span for v.
...@@ -138,16 +137,8 @@ MCentral_Free(MCentral *c, void *v) ...@@ -138,16 +137,8 @@ MCentral_Free(MCentral *c, void *v)
if(--s->ref == 0) { if(--s->ref == 0) {
size = runtime·class_to_size[c->sizeclass]; size = runtime·class_to_size[c->sizeclass];
runtime·MSpanList_Remove(s); runtime·MSpanList_Remove(s);
// The second word of each freed block indicates runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
// whether it needs to be zeroed. The first word *(uintptr*)(s->start<<PageShift) = 1; // needs zeroing
// is the link pointer and must always be cleared.
for(p=s->freelist; p; p=next) {
next = p->next;
if(size > sizeof(uintptr) && ((uintptr*)p)[1] != 0)
runtime·memclr((byte*)p, size);
else
p->next = nil;
}
s->freelist = nil; s->freelist = nil;
c->nfree -= (s->npages << PageShift) / size; c->nfree -= (s->npages << PageShift) / size;
runtime·unlock(c); runtime·unlock(c);
...@@ -157,7 +148,7 @@ MCentral_Free(MCentral *c, void *v) ...@@ -157,7 +148,7 @@ MCentral_Free(MCentral *c, void *v)
} }
void void
runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *nobj) runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj)
{ {
int32 size; int32 size;
int32 npages; int32 npages;
...@@ -166,7 +157,7 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 ...@@ -166,7 +157,7 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32
size = runtime·class_to_size[sizeclass]; size = runtime·class_to_size[sizeclass];
*npagesp = npages; *npagesp = npages;
*sizep = size; *sizep = size;
*nobj = (npages << PageShift) / (size + RefcountOverhead); *nobj = (npages << PageShift) / size;
} }
// Fetch a new span from the heap and // Fetch a new span from the heap and
...@@ -174,7 +165,8 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 ...@@ -174,7 +165,8 @@ runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32
static bool static bool
MCentral_Grow(MCentral *c) MCentral_Grow(MCentral *c)
{ {
int32 i, n, npages, size; int32 i, n, npages;
uintptr size;
MLink **tailp, *v; MLink **tailp, *v;
byte *p; byte *p;
MSpan *s; MSpan *s;
...@@ -191,7 +183,7 @@ MCentral_Grow(MCentral *c) ...@@ -191,7 +183,7 @@ MCentral_Grow(MCentral *c)
// Carve span into sequence of blocks. // Carve span into sequence of blocks.
tailp = &s->freelist; tailp = &s->freelist;
p = (byte*)(s->start << PageShift); p = (byte*)(s->start << PageShift);
s->gcref = (uint32*)(p + size*n); s->limit = p + size*n;
for(i=0; i<n; i++) { for(i=0; i<n; i++) {
v = (MLink*)p; v = (MLink*)p;
*tailp = v; *tailp = v;
...@@ -199,6 +191,7 @@ MCentral_Grow(MCentral *c) ...@@ -199,6 +191,7 @@ MCentral_Grow(MCentral *c)
p += size; p += size;
} }
*tailp = nil; *tailp = nil;
runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
runtime·lock(c); runtime·lock(c);
c->nfree += n; c->nfree += n;
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "runtime.h" #include "runtime.h"
#include "malloc.h" #include "malloc.h"
// TODO(rsc): Why not just use mheap.Lock?
static Lock finlock; static Lock finlock;
// Finalizer hash table. Direct hash, linear scan, at most 3/4 full. // Finalizer hash table. Direct hash, linear scan, at most 3/4 full.
...@@ -101,24 +102,21 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret) ...@@ -101,24 +102,21 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret)
} }
runtime·lock(&finlock); runtime·lock(&finlock);
if(!runtime·mlookup(p, &base, nil, nil, &ref) || p != base) { if(!runtime·mlookup(p, &base, nil, nil) || p != base) {
runtime·unlock(&finlock); runtime·unlock(&finlock);
runtime·throw("addfinalizer on invalid pointer"); runtime·throw("addfinalizer on invalid pointer");
} }
if(f == nil) { if(f == nil) {
if(*ref & RefHasFinalizer) { lookfintab(&fintab, p, 1);
lookfintab(&fintab, p, 1);
*ref &= ~RefHasFinalizer;
}
runtime·unlock(&finlock); runtime·unlock(&finlock);
return; return;
} }
if(*ref & RefHasFinalizer) { if(lookfintab(&fintab, p, 0)) {
runtime·unlock(&finlock); runtime·unlock(&finlock);
runtime·throw("double finalizer"); runtime·throw("double finalizer");
} }
*ref |= RefHasFinalizer; runtime·setblockspecial(p);
if(fintab.nkey >= fintab.max/2+fintab.max/4) { if(fintab.nkey >= fintab.max/2+fintab.max/4) {
// keep table at most 3/4 full: // keep table at most 3/4 full:
...@@ -134,7 +132,7 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret) ...@@ -134,7 +132,7 @@ runtime·addfinalizer(void *p, void (*f)(void*), int32 nret)
newtab.max *= 3; newtab.max *= 3;
} }
newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], RefNoPointers, 0, 1); newtab.key = runtime·mallocgc(newtab.max*sizeof newtab.key[0], FlagNoPointers, 0, 1);
newtab.val = runtime·mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1); newtab.val = runtime·mallocgc(newtab.max*sizeof newtab.val[0], 0, 0, 1);
for(i=0; i<fintab.max; i++) { for(i=0; i<fintab.max; i++) {
......
This diff is collapsed.
...@@ -180,7 +180,9 @@ MHeap_Grow(MHeap *h, uintptr npage) ...@@ -180,7 +180,9 @@ MHeap_Grow(MHeap *h, uintptr npage)
// Allocate a multiple of 64kB (16 pages). // Allocate a multiple of 64kB (16 pages).
npage = (npage+15)&~15; npage = (npage+15)&~15;
ask = npage<<PageShift; ask = npage<<PageShift;
if(ask < HeapAllocChunk) if(ask > h->arena_end - h->arena_used)
return false;
if(ask < HeapAllocChunk && HeapAllocChunk <= h->arena_end - h->arena_used)
ask = HeapAllocChunk; ask = HeapAllocChunk;
v = runtime·MHeap_SysAlloc(h, ask); v = runtime·MHeap_SysAlloc(h, ask);
...@@ -194,11 +196,6 @@ MHeap_Grow(MHeap *h, uintptr npage) ...@@ -194,11 +196,6 @@ MHeap_Grow(MHeap *h, uintptr npage)
} }
mstats.heap_sys += ask; mstats.heap_sys += ask;
if((byte*)v < h->arena_start || h->arena_start == nil)
h->arena_start = v;
if((byte*)v+ask > h->arena_end)
h->arena_end = (byte*)v+ask;
// Create a fake "in use" span and free it, so that the // Create a fake "in use" span and free it, so that the
// right coalescing happens. // right coalescing happens.
s = runtime·FixAlloc_Alloc(&h->spanalloc); s = runtime·FixAlloc_Alloc(&h->spanalloc);
...@@ -370,10 +367,14 @@ runtime·MSpanList_IsEmpty(MSpan *list) ...@@ -370,10 +367,14 @@ runtime·MSpanList_IsEmpty(MSpan *list)
void void
runtime·MSpanList_Insert(MSpan *list, MSpan *span) runtime·MSpanList_Insert(MSpan *list, MSpan *span)
{ {
if(span->next != nil || span->prev != nil) if(span->next != nil || span->prev != nil) {
runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
runtime·throw("MSpanList_Insert"); runtime·throw("MSpanList_Insert");
}
span->next = list->next; span->next = list->next;
span->prev = list; span->prev = list;
span->next->prev = span; span->next->prev = span;
span->prev->next = span; span->prev->next = span;
} }
...@@ -65,7 +65,7 @@ stkbucket(uintptr *stk, int32 nstk) ...@@ -65,7 +65,7 @@ stkbucket(uintptr *stk, int32 nstk)
runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0) runtime·mcmp((byte*)b->stk, (byte*)stk, nstk*sizeof stk[0]) == 0)
return b; return b;
b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], RefNoProfiling, 0, 1); b = runtime·mallocgc(sizeof *b + nstk*sizeof stk[0], FlagNoProfiling, 0, 1);
bucketmem += sizeof *b + nstk*sizeof stk[0]; bucketmem += sizeof *b + nstk*sizeof stk[0];
runtime·memmove(b->stk, stk, nstk*sizeof stk[0]); runtime·memmove(b->stk, stk, nstk*sizeof stk[0]);
b->hash = h; b->hash = h;
...@@ -132,7 +132,7 @@ setaddrbucket(uintptr addr, Bucket *b) ...@@ -132,7 +132,7 @@ setaddrbucket(uintptr addr, Bucket *b)
if(ah->addr == (addr>>20)) if(ah->addr == (addr>>20))
goto found; goto found;
ah = runtime·mallocgc(sizeof *ah, RefNoProfiling, 0, 1); ah = runtime·mallocgc(sizeof *ah, FlagNoProfiling, 0, 1);
addrmem += sizeof *ah; addrmem += sizeof *ah;
ah->next = addrhash[h]; ah->next = addrhash[h];
ah->addr = addr>>20; ah->addr = addr>>20;
...@@ -140,7 +140,7 @@ setaddrbucket(uintptr addr, Bucket *b) ...@@ -140,7 +140,7 @@ setaddrbucket(uintptr addr, Bucket *b)
found: found:
if((e = addrfree) == nil) { if((e = addrfree) == nil) {
e = runtime·mallocgc(64*sizeof *e, RefNoProfiling, 0, 0); e = runtime·mallocgc(64*sizeof *e, FlagNoProfiling, 0, 0);
addrmem += 64*sizeof *e; addrmem += 64*sizeof *e;
for(i=0; i+1<64; i++) for(i=0; i+1<64; i++)
e[i].next = &e[i+1]; e[i].next = &e[i+1];
......
...@@ -57,7 +57,7 @@ runtime·SizeToClass(int32 size) ...@@ -57,7 +57,7 @@ runtime·SizeToClass(int32 size)
void void
runtime·InitSizes(void) runtime·InitSizes(void)
{ {
int32 align, sizeclass, size, osize, nextsize, n; int32 align, sizeclass, size, nextsize, n;
uint32 i; uint32 i;
uintptr allocsize, npages; uintptr allocsize, npages;
...@@ -81,8 +81,7 @@ runtime·InitSizes(void) ...@@ -81,8 +81,7 @@ runtime·InitSizes(void)
// the leftover is less than 1/8 of the total, // the leftover is less than 1/8 of the total,
// so wasted space is at most 12.5%. // so wasted space is at most 12.5%.
allocsize = PageSize; allocsize = PageSize;
osize = size + RefcountOverhead; while(allocsize%size > allocsize/8)
while(allocsize%osize > (allocsize/8))
allocsize += PageSize; allocsize += PageSize;
npages = allocsize >> PageShift; npages = allocsize >> PageShift;
...@@ -93,7 +92,7 @@ runtime·InitSizes(void) ...@@ -93,7 +92,7 @@ runtime·InitSizes(void)
// different sizes. // different sizes.
if(sizeclass > 1 if(sizeclass > 1
&& npages == runtime·class_to_allocnpages[sizeclass-1] && npages == runtime·class_to_allocnpages[sizeclass-1]
&& allocsize/osize == allocsize/(runtime·class_to_size[sizeclass-1]+RefcountOverhead)) { && allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) {
runtime·class_to_size[sizeclass-1] = size; runtime·class_to_size[sizeclass-1] = size;
continue; continue;
} }
......
...@@ -41,7 +41,7 @@ makeslice1(SliceType *t, int32 len, int32 cap, Slice *ret) ...@@ -41,7 +41,7 @@ makeslice1(SliceType *t, int32 len, int32 cap, Slice *ret)
ret->cap = cap; ret->cap = cap;
if((t->elem->kind&KindNoPointers)) if((t->elem->kind&KindNoPointers))
ret->array = runtime·mallocgc(size, RefNoPointers, 1, 1); ret->array = runtime·mallocgc(size, FlagNoPointers, 1, 1);
else else
ret->array = runtime·mal(size); ret->array = runtime·mal(size);
} }
......
...@@ -225,7 +225,7 @@ func slicebytetostring(b Slice) (s String) { ...@@ -225,7 +225,7 @@ func slicebytetostring(b Slice) (s String) {
} }
func stringtoslicebyte(s String) (b Slice) { func stringtoslicebyte(s String) (b Slice) {
b.array = runtime·mallocgc(s.len, RefNoPointers, 1, 1); b.array = runtime·mallocgc(s.len, FlagNoPointers, 1, 1);
b.len = s.len; b.len = s.len;
b.cap = s.len; b.cap = s.len;
runtime·mcpy(b.array, s.str, s.len); runtime·mcpy(b.array, s.str, s.len);
...@@ -268,7 +268,7 @@ func stringtosliceint(s String) (b Slice) { ...@@ -268,7 +268,7 @@ func stringtosliceint(s String) (b Slice) {
n++; n++;
} }
b.array = runtime·mallocgc(n*sizeof(r[0]), RefNoPointers, 1, 1); b.array = runtime·mallocgc(n*sizeof(r[0]), FlagNoPointers, 1, 1);
b.len = n; b.len = n;
b.cap = n; b.cap = n;
p = s.str; p = s.str;
......
...@@ -48,7 +48,7 @@ runtime·SysFree(void *v, uintptr n) ...@@ -48,7 +48,7 @@ runtime·SysFree(void *v, uintptr n)
void* void*
runtime·SysReserve(void *v, uintptr n) runtime·SysReserve(void *v, uintptr n)
{ {
return runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, 0); return runtime·stdcall(runtime·VirtualAlloc, 4, v, n, MEM_RESERVE, PAGE_EXECUTE_READWRITE);
} }
void void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment