Commit 3b5278fc authored by Keith Randall's avatar Keith Randall

runtime: get rid of the settype buffer and lock.

MCaches	now hold a MSpan for each sizeclass which they have
exclusive access to allocate from, so no lock is needed.

Modifying the heap bitmaps also no longer requires a cas.

runtime.free gets more expensive.  But we don't use it
much any more.

It's not much faster on 1 processor, but it's a lot
faster on multiple processors.

benchmark                 old ns/op    new ns/op    delta
BenchmarkSetTypeNoPtr1           24           23   -0.42%
BenchmarkSetTypeNoPtr2           33           34   +0.89%
BenchmarkSetTypePtr1             51           49   -3.72%
BenchmarkSetTypePtr2             55           54   -1.98%

benchmark                old ns/op    new ns/op    delta
BenchmarkAllocation          52739        50770   -3.73%
BenchmarkAllocation-2        33957        34141   +0.54%
BenchmarkAllocation-3        33326        29015  -12.94%
BenchmarkAllocation-4        38105        25795  -32.31%
BenchmarkAllocation-5        68055        24409  -64.13%
BenchmarkAllocation-6        71544        23488  -67.17%
BenchmarkAllocation-7        68374        23041  -66.30%
BenchmarkAllocation-8        70117        20758  -70.40%

LGTM=rsc, dvyukov
R=dvyukov, bradfitz, khr, rsc
CC=golang-codereviews
https://golang.org/cl/46810043
parent 3d4c12d9
...@@ -151,3 +151,73 @@ func TestGcRescan(t *testing.T) { ...@@ -151,3 +151,73 @@ func TestGcRescan(t *testing.T) {
} }
} }
} }
func BenchmarkSetTypeNoPtr1(b *testing.B) {
type NoPtr1 struct {
p uintptr
}
var p *NoPtr1
for i := 0; i < b.N; i++ {
p = &NoPtr1{}
}
_ = p
}
func BenchmarkSetTypeNoPtr2(b *testing.B) {
type NoPtr2 struct {
p, q uintptr
}
var p *NoPtr2
for i := 0; i < b.N; i++ {
p = &NoPtr2{}
}
_ = p
}
func BenchmarkSetTypePtr1(b *testing.B) {
type Ptr1 struct {
p *byte
}
var p *Ptr1
for i := 0; i < b.N; i++ {
p = &Ptr1{}
}
_ = p
}
func BenchmarkSetTypePtr2(b *testing.B) {
type Ptr2 struct {
p, q *byte
}
var p *Ptr2
for i := 0; i < b.N; i++ {
p = &Ptr2{}
}
_ = p
}
func BenchmarkAllocation(b *testing.B) {
type T struct {
x, y *byte
}
ngo := runtime.GOMAXPROCS(0)
work := make(chan bool, b.N+ngo)
result := make(chan *T)
for i := 0; i < b.N; i++ {
work <- true
}
for i := 0; i < ngo; i++ {
work <- false
}
for i := 0; i < ngo; i++ {
go func() {
var x *T
for <-work {
for i := 0; i < 1000; i++ {
x = &T{}
}
}
result <- x
}()
}
for i := 0; i < ngo; i++ {
<-result
}
}
...@@ -27,8 +27,9 @@ extern MStats mstats; // defined in zruntime_def_$GOOS_$GOARCH.go ...@@ -27,8 +27,9 @@ extern MStats mstats; // defined in zruntime_def_$GOOS_$GOARCH.go
extern volatile intgo runtime·MemProfileRate; extern volatile intgo runtime·MemProfileRate;
static void* largealloc(uint32, uintptr*); static MSpan* largealloc(uint32, uintptr*);
static void profilealloc(void *v, uintptr size, uintptr typ); static void profilealloc(void *v, uintptr size, uintptr typ);
static void settype(MSpan *s, void *v, uintptr typ);
// Allocate an object of at least size bytes. // Allocate an object of at least size bytes.
// Small objects are allocated from the per-thread cache's free lists. // Small objects are allocated from the per-thread cache's free lists.
...@@ -41,7 +42,7 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) ...@@ -41,7 +42,7 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag)
uintptr tinysize, size1; uintptr tinysize, size1;
intgo rate; intgo rate;
MCache *c; MCache *c;
MCacheList *l; MSpan *s;
MLink *v, *next; MLink *v, *next;
byte *tiny; byte *tiny;
...@@ -53,8 +54,8 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) ...@@ -53,8 +54,8 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag)
} }
if(m->mallocing) if(m->mallocing)
runtime·throw("malloc/free - deadlock"); runtime·throw("malloc/free - deadlock");
// Disable preemption during settype_flush. // Disable preemption during settype.
// We can not use m->mallocing for this, because settype_flush calls mallocgc. // We can not use m->mallocing for this, because settype calls mallocgc.
m->locks++; m->locks++;
m->mallocing = 1; m->mallocing = 1;
...@@ -118,15 +119,15 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) ...@@ -118,15 +119,15 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag)
} }
} }
// Allocate a new TinySize block. // Allocate a new TinySize block.
l = &c->list[TinySizeClass]; s = c->alloc[TinySizeClass];
if(l->list == nil) if(s->freelist == nil)
runtime·MCache_Refill(c, TinySizeClass); s = runtime·MCache_Refill(c, TinySizeClass);
v = l->list; v = s->freelist;
next = v->next; next = v->next;
s->freelist = next;
s->ref++;
if(next != nil) // prefetching nil leads to a DTLB miss if(next != nil) // prefetching nil leads to a DTLB miss
PREFETCH(next); PREFETCH(next);
l->list = next;
l->nlist--;
((uint64*)v)[0] = 0; ((uint64*)v)[0] = 0;
((uint64*)v)[1] = 0; ((uint64*)v)[1] = 0;
// See if we need to replace the existing tiny block with the new one // See if we need to replace the existing tiny block with the new one
...@@ -145,15 +146,15 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) ...@@ -145,15 +146,15 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag)
else else
sizeclass = runtime·size_to_class128[(size-1024+127) >> 7]; sizeclass = runtime·size_to_class128[(size-1024+127) >> 7];
size = runtime·class_to_size[sizeclass]; size = runtime·class_to_size[sizeclass];
l = &c->list[sizeclass]; s = c->alloc[sizeclass];
if(l->list == nil) if(s->freelist == nil)
runtime·MCache_Refill(c, sizeclass); s = runtime·MCache_Refill(c, sizeclass);
v = l->list; v = s->freelist;
next = v->next; next = v->next;
s->freelist = next;
s->ref++;
if(next != nil) // prefetching nil leads to a DTLB miss if(next != nil) // prefetching nil leads to a DTLB miss
PREFETCH(next); PREFETCH(next);
l->list = next;
l->nlist--;
if(!(flag & FlagNoZero)) { if(!(flag & FlagNoZero)) {
v->next = nil; v->next = nil;
// block is zeroed iff second word is zero ... // block is zeroed iff second word is zero ...
...@@ -164,7 +165,8 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) ...@@ -164,7 +165,8 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag)
c->local_cachealloc += size; c->local_cachealloc += size;
} else { } else {
// Allocate directly from heap. // Allocate directly from heap.
v = largealloc(flag, &size); s = largealloc(flag, &size);
v = (void*)(s->start << PageShift);
} }
if(flag & FlagNoGC) if(flag & FlagNoGC)
...@@ -175,21 +177,12 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) ...@@ -175,21 +177,12 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag)
if(DebugTypeAtBlockEnd) if(DebugTypeAtBlockEnd)
*(uintptr*)((uintptr)v+size-sizeof(uintptr)) = typ; *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = typ;
m->mallocing = 0;
// TODO: save type even if FlagNoScan? Potentially expensive but might help // TODO: save type even if FlagNoScan? Potentially expensive but might help
// heap profiling/tracing. // heap profiling/tracing.
if(UseSpanType && !(flag & FlagNoScan) && typ != 0) { if(UseSpanType && !(flag & FlagNoScan) && typ != 0)
uintptr *buf, i; settype(s, v, typ);
buf = m->settype_buf;
i = m->settype_bufsize;
buf[i++] = (uintptr)v;
buf[i++] = typ;
m->settype_bufsize = i;
}
m->mallocing = 0;
if(UseSpanType && !(flag & FlagNoScan) && typ != 0 && m->settype_bufsize == nelem(m->settype_buf))
runtime·settype_flush(m);
if(raceenabled) if(raceenabled)
runtime·racemalloc(v, size); runtime·racemalloc(v, size);
...@@ -215,7 +208,7 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag) ...@@ -215,7 +208,7 @@ runtime·mallocgc(uintptr size, uintptr typ, uint32 flag)
return v; return v;
} }
static void* static MSpan*
largealloc(uint32 flag, uintptr *sizep) largealloc(uint32 flag, uintptr *sizep)
{ {
uintptr npages, size; uintptr npages, size;
...@@ -237,7 +230,7 @@ largealloc(uint32 flag, uintptr *sizep) ...@@ -237,7 +230,7 @@ largealloc(uint32 flag, uintptr *sizep)
v = (void*)(s->start << PageShift); v = (void*)(s->start << PageShift);
// setup for mark sweep // setup for mark sweep
runtime·markspan(v, 0, 0, true); runtime·markspan(v, 0, 0, true);
return v; return s;
} }
static void static void
...@@ -318,7 +311,7 @@ runtime·free(void *v) ...@@ -318,7 +311,7 @@ runtime·free(void *v)
s->needzero = 1; s->needzero = 1;
// Must mark v freed before calling unmarkspan and MHeap_Free: // Must mark v freed before calling unmarkspan and MHeap_Free:
// they might coalesce v into other spans and change the bitmap further. // they might coalesce v into other spans and change the bitmap further.
runtime·markfreed(v, size); runtime·markfreed(v);
runtime·unmarkspan(v, 1<<PageShift); runtime·unmarkspan(v, 1<<PageShift);
if(runtime·debug.efence) if(runtime·debug.efence)
runtime·SysFree((void*)(s->start<<PageShift), size, &mstats.heap_sys); runtime·SysFree((void*)(s->start<<PageShift), size, &mstats.heap_sys);
...@@ -335,9 +328,17 @@ runtime·free(void *v) ...@@ -335,9 +328,17 @@ runtime·free(void *v)
// Must mark v freed before calling MCache_Free: // Must mark v freed before calling MCache_Free:
// it might coalesce v and other blocks into a bigger span // it might coalesce v and other blocks into a bigger span
// and change the bitmap further. // and change the bitmap further.
runtime·markfreed(v, size);
c->local_nsmallfree[sizeclass]++; c->local_nsmallfree[sizeclass]++;
runtime·MCache_Free(c, v, sizeclass, size); if(c->alloc[sizeclass] == s) {
// We own the span, so we can just add v to the freelist
runtime·markfreed(v);
((MLink*)v)->next = s->freelist;
s->freelist = v;
s->ref--;
} else {
// Someone else owns this span. Add to free queue.
runtime·MCache_Free(c, v, sizeclass, size);
}
} }
m->mallocing = 0; m->mallocing = 0;
} }
...@@ -390,37 +391,6 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) ...@@ -390,37 +391,6 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
return 1; return 1;
} }
MCache*
runtime·allocmcache(void)
{
intgo rate;
MCache *c;
runtime·lock(&runtime·mheap);
c = runtime·FixAlloc_Alloc(&runtime·mheap.cachealloc);
runtime·unlock(&runtime·mheap);
runtime·memclr((byte*)c, sizeof(*c));
// Set first allocation sample size.
rate = runtime·MemProfileRate;
if(rate > 0x3fffffff) // make 2*rate not overflow
rate = 0x3fffffff;
if(rate != 0)
c->next_sample = runtime·fastrand1() % (2*rate);
return c;
}
void
runtime·freemcache(MCache *c)
{
runtime·MCache_ReleaseAll(c);
runtime·lock(&runtime·mheap);
runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
runtime·unlock(&runtime·mheap);
}
void void
runtime·purgecachedstats(MCache *c) runtime·purgecachedstats(MCache *c)
{ {
...@@ -696,94 +666,67 @@ runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat) ...@@ -696,94 +666,67 @@ runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat)
return p; return p;
} }
static Lock settype_lock; static void
settype(MSpan *s, void *v, uintptr typ)
void
runtime·settype_flush(M *mp)
{ {
uintptr *buf, *endbuf;
uintptr size, ofs, j, t; uintptr size, ofs, j, t;
uintptr ntypes, nbytes2, nbytes3; uintptr ntypes, nbytes2, nbytes3;
uintptr *data2; uintptr *data2;
byte *data3; byte *data3;
void *v;
uintptr typ, p;
MSpan *s;
buf = mp->settype_buf; if(s->sizeclass == 0) {
endbuf = buf + mp->settype_bufsize; s->types.compression = MTypes_Single;
s->types.data = typ;
runtime·lock(&settype_lock); return;
while(buf < endbuf) { }
v = (void*)*buf; size = s->elemsize;
*buf = 0; ofs = ((uintptr)v - (s->start<<PageShift)) / size;
buf++;
typ = *buf; switch(s->types.compression) {
buf++; case MTypes_Empty:
ntypes = (s->npages << PageShift) / size;
// (Manually inlined copy of runtime·MHeap_Lookup) nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
p = (uintptr)v>>PageShift; data3 = runtime·mallocgc(nbytes3, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC);
p -= (uintptr)runtime·mheap.arena_start >> PageShift; s->types.compression = MTypes_Bytes;
s = runtime·mheap.spans[p]; s->types.data = (uintptr)data3;
((uintptr*)data3)[1] = typ;
if(s->sizeclass == 0) { data3[8*sizeof(uintptr) + ofs] = 1;
s->types.compression = MTypes_Single; break;
s->types.data = typ;
continue; case MTypes_Words:
((uintptr*)s->types.data)[ofs] = typ;
break;
case MTypes_Bytes:
data3 = (byte*)s->types.data;
for(j=1; j<8; j++) {
if(((uintptr*)data3)[j] == typ) {
break;
}
if(((uintptr*)data3)[j] == 0) {
((uintptr*)data3)[j] = typ;
break;
}
} }
if(j < 8) {
size = s->elemsize; data3[8*sizeof(uintptr) + ofs] = j;
ofs = ((uintptr)v - (s->start<<PageShift)) / size; } else {
switch(s->types.compression) {
case MTypes_Empty:
ntypes = (s->npages << PageShift) / size; ntypes = (s->npages << PageShift) / size;
nbytes3 = 8*sizeof(uintptr) + 1*ntypes; nbytes2 = ntypes * sizeof(uintptr);
data3 = runtime·mallocgc(nbytes3, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC); data2 = runtime·mallocgc(nbytes2, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC);
s->types.compression = MTypes_Bytes; s->types.compression = MTypes_Words;
s->types.data = (uintptr)data3; s->types.data = (uintptr)data2;
((uintptr*)data3)[1] = typ;
data3[8*sizeof(uintptr) + ofs] = 1; // Move the contents of data3 to data2. Then deallocate data3.
break; for(j=0; j<ntypes; j++) {
t = data3[8*sizeof(uintptr) + j];
case MTypes_Words: t = ((uintptr*)data3)[t];
((uintptr*)s->types.data)[ofs] = typ; data2[j] = t;
break;
case MTypes_Bytes:
data3 = (byte*)s->types.data;
for(j=1; j<8; j++) {
if(((uintptr*)data3)[j] == typ) {
break;
}
if(((uintptr*)data3)[j] == 0) {
((uintptr*)data3)[j] = typ;
break;
}
}
if(j < 8) {
data3[8*sizeof(uintptr) + ofs] = j;
} else {
ntypes = (s->npages << PageShift) / size;
nbytes2 = ntypes * sizeof(uintptr);
data2 = runtime·mallocgc(nbytes2, 0, FlagNoProfiling|FlagNoScan|FlagNoInvokeGC);
s->types.compression = MTypes_Words;
s->types.data = (uintptr)data2;
// Move the contents of data3 to data2. Then deallocate data3.
for(j=0; j<ntypes; j++) {
t = data3[8*sizeof(uintptr) + j];
t = ((uintptr*)data3)[t];
data2[j] = t;
}
data2[ofs] = typ;
} }
break; data2[ofs] = typ;
} }
break;
} }
runtime·unlock(&settype_lock);
mp->settype_bufsize = 0;
} }
uintptr uintptr
...@@ -816,9 +759,7 @@ runtime·gettype(void *v) ...@@ -816,9 +759,7 @@ runtime·gettype(void *v)
runtime·throw("runtime·gettype: invalid compression kind"); runtime·throw("runtime·gettype: invalid compression kind");
} }
if(0) { if(0) {
runtime·lock(&settype_lock);
runtime·printf("%p -> %d,%X\n", v, (int32)s->types.compression, (int64)t); runtime·printf("%p -> %d,%X\n", v, (int32)s->types.compression, (int64)t);
runtime·unlock(&settype_lock);
} }
return t; return t;
} }
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
// MHeap: the malloc heap, managed at page (4096-byte) granularity. // MHeap: the malloc heap, managed at page (4096-byte) granularity.
// MSpan: a run of pages managed by the MHeap. // MSpan: a run of pages managed by the MHeap.
// MCentral: a shared free list for a given size class. // MCentral: a shared free list for a given size class.
// MCache: a per-thread (in Go, per-M) cache for small objects. // MCache: a per-thread (in Go, per-P) cache for small objects.
// MStats: allocation statistics. // MStats: allocation statistics.
// //
// Allocating a small object proceeds up a hierarchy of caches: // Allocating a small object proceeds up a hierarchy of caches:
...@@ -281,8 +281,6 @@ extern int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1]; ...@@ -281,8 +281,6 @@ extern int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
extern void runtime·InitSizes(void); extern void runtime·InitSizes(void);
// Per-thread (in Go, per-M) cache for small objects.
// No locking needed because it is per-thread (per-M).
typedef struct MCacheList MCacheList; typedef struct MCacheList MCacheList;
struct MCacheList struct MCacheList
{ {
...@@ -290,6 +288,8 @@ struct MCacheList ...@@ -290,6 +288,8 @@ struct MCacheList
uint32 nlist; uint32 nlist;
}; };
// Per-thread (in Go, per-P) cache for small objects.
// No locking needed because it is per-thread (per-P).
struct MCache struct MCache
{ {
// The following members are accessed on every malloc, // The following members are accessed on every malloc,
...@@ -301,7 +301,8 @@ struct MCache ...@@ -301,7 +301,8 @@ struct MCache
byte* tiny; byte* tiny;
uintptr tinysize; uintptr tinysize;
// The rest is not accessed on every malloc. // The rest is not accessed on every malloc.
MCacheList list[NumSizeClasses]; MSpan* alloc[NumSizeClasses]; // spans to allocate from
MCacheList free[NumSizeClasses];// lists of explicitly freed objects
// Local allocator stats, flushed during GC. // Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups uintptr local_nlookup; // number of pointer lookups
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize) uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
...@@ -309,8 +310,8 @@ struct MCache ...@@ -309,8 +310,8 @@ struct MCache
uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize) uintptr local_nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize)
}; };
void runtime·MCache_Refill(MCache *c, int32 sizeclass); MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass);
void runtime·MCache_Free(MCache *c, void *p, int32 sizeclass, uintptr size); void runtime·MCache_Free(MCache *c, MLink *p, int32 sizeclass, uintptr size);
void runtime·MCache_ReleaseAll(MCache *c); void runtime·MCache_ReleaseAll(MCache *c);
// MTypes describes the types of blocks allocated within a span. // MTypes describes the types of blocks allocated within a span.
...@@ -409,8 +410,9 @@ struct MSpan ...@@ -409,8 +410,9 @@ struct MSpan
// if sweepgen == h->sweepgen, the span is swept and ready to use // if sweepgen == h->sweepgen, the span is swept and ready to use
// h->sweepgen is incremented by 2 after every GC // h->sweepgen is incremented by 2 after every GC
uint32 sweepgen; uint32 sweepgen;
uint16 ref; // number of allocated objects in this span uint16 ref; // capacity - number of objects in freelist
uint8 sizeclass; // size class uint8 sizeclass; // size class
bool incache; // being used by an MCache
uint8 state; // MSpanInUse etc uint8 state; // MSpanInUse etc
uint8 needzero; // needs to be zeroed before allocation uint8 needzero; // needs to be zeroed before allocation
uintptr elemsize; // computed from sizeclass or from npages uintptr elemsize; // computed from sizeclass or from npages
...@@ -418,8 +420,9 @@ struct MSpan ...@@ -418,8 +420,9 @@ struct MSpan
uintptr npreleased; // number of pages released to the OS uintptr npreleased; // number of pages released to the OS
byte *limit; // end of data in span byte *limit; // end of data in span
MTypes types; // types of allocated objects in this span MTypes types; // types of allocated objects in this span
Lock specialLock; // TODO: use to protect types also (instead of settype_lock) Lock specialLock; // guards specials list
Special *specials; // linked list of special records sorted by offset. Special *specials; // linked list of special records sorted by offset.
MLink *freebuf; // objects freed explicitly, not incorporated into freelist yet
}; };
void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages); void runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages);
...@@ -441,15 +444,16 @@ struct MCentral ...@@ -441,15 +444,16 @@ struct MCentral
{ {
Lock; Lock;
int32 sizeclass; int32 sizeclass;
MSpan nonempty; MSpan nonempty; // list of spans with a free object
MSpan empty; MSpan empty; // list of spans with no free objects (or cached in an MCache)
int32 nfree; int32 nfree; // # of objects available in nonempty spans
}; };
void runtime·MCentral_Init(MCentral *c, int32 sizeclass); void runtime·MCentral_Init(MCentral *c, int32 sizeclass);
int32 runtime·MCentral_AllocList(MCentral *c, MLink **first); MSpan* runtime·MCentral_CacheSpan(MCentral *c);
void runtime·MCentral_FreeList(MCentral *c, MLink *first); void runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s);
bool runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end); bool runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
void runtime·MCentral_FreeList(MCentral *c, MLink *start); // TODO: need this?
// Main malloc heap. // Main malloc heap.
// The heap itself is the "free[]" and "large" arrays, // The heap itself is the "free[]" and "large" arrays,
...@@ -520,7 +524,7 @@ uintptr runtime·sweepone(void); ...@@ -520,7 +524,7 @@ uintptr runtime·sweepone(void);
void runtime·markscan(void *v); void runtime·markscan(void *v);
void runtime·marknogc(void *v); void runtime·marknogc(void *v);
void runtime·checkallocated(void *v, uintptr n); void runtime·checkallocated(void *v, uintptr n);
void runtime·markfreed(void *v, uintptr n); void runtime·markfreed(void *v);
void runtime·checkfreed(void *v, uintptr n); void runtime·checkfreed(void *v, uintptr n);
extern int32 runtime·checking; extern int32 runtime·checking;
void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover); void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
...@@ -529,8 +533,6 @@ void runtime·purgecachedstats(MCache*); ...@@ -529,8 +533,6 @@ void runtime·purgecachedstats(MCache*);
void* runtime·cnew(Type*); void* runtime·cnew(Type*);
void* runtime·cnewarray(Type*, intgo); void* runtime·cnewarray(Type*, intgo);
void runtime·settype_flush(M*);
void runtime·settype_sysfree(MSpan*);
uintptr runtime·gettype(void*); uintptr runtime·gettype(void*);
enum enum
......
...@@ -10,69 +10,119 @@ ...@@ -10,69 +10,119 @@
#include "arch_GOARCH.h" #include "arch_GOARCH.h"
#include "malloc.h" #include "malloc.h"
extern volatile intgo runtime·MemProfileRate;
// dummy MSpan that contains no free objects.
static MSpan emptymspan;
MCache*
runtime·allocmcache(void)
{
intgo rate;
MCache *c;
int32 i;
runtime·lock(&runtime·mheap);
c = runtime·FixAlloc_Alloc(&runtime·mheap.cachealloc);
runtime·unlock(&runtime·mheap);
runtime·memclr((byte*)c, sizeof(*c));
for(i = 0; i < NumSizeClasses; i++)
c->alloc[i] = &emptymspan;
// Set first allocation sample size.
rate = runtime·MemProfileRate;
if(rate > 0x3fffffff) // make 2*rate not overflow
rate = 0x3fffffff;
if(rate != 0)
c->next_sample = runtime·fastrand1() % (2*rate);
return c;
}
void void
runtime·freemcache(MCache *c)
{
runtime·MCache_ReleaseAll(c);
runtime·lock(&runtime·mheap);
runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
runtime·unlock(&runtime·mheap);
}
// Gets a span that has a free object in it and assigns it
// to be the cached span for the given sizeclass. Returns this span.
MSpan*
runtime·MCache_Refill(MCache *c, int32 sizeclass) runtime·MCache_Refill(MCache *c, int32 sizeclass)
{ {
MCacheList *l; MCacheList *l;
MSpan *s;
// Replenish using central lists. m->locks++;
l = &c->list[sizeclass]; // Return the current cached span to the central lists.
if(l->list) s = c->alloc[sizeclass];
runtime·throw("MCache_Refill: the list is not empty"); if(s->freelist != nil)
l->nlist = runtime·MCentral_AllocList(&runtime·mheap.central[sizeclass], &l->list); runtime·throw("refill on a nonempty span");
if(l->list == nil) if(s != &emptymspan)
runtime·throw("out of memory"); runtime·MCentral_UncacheSpan(&runtime·mheap.central[sizeclass], s);
}
// Take n elements off l and return them to the central free list. // Push any explicitly freed objects to the central lists.
static void // Not required, but it seems like a good time to do it.
ReleaseN(MCacheList *l, int32 n, int32 sizeclass) l = &c->free[sizeclass];
{ if(l->nlist > 0) {
MLink *first, **lp; runtime·MCentral_FreeList(&runtime·mheap.central[sizeclass], l->list);
int32 i; l->list = nil;
l->nlist = 0;
}
// Cut off first n elements. // Get a new cached span from the central lists.
first = l->list; s = runtime·MCentral_CacheSpan(&runtime·mheap.central[sizeclass]);
lp = &l->list; if(s == nil)
for(i=0; i<n; i++) runtime·throw("out of memory");
lp = &(*lp)->next; if(s->freelist == nil) {
l->list = *lp; runtime·printf("%d %d\n", s->ref, (int32)((s->npages << PageShift) / s->elemsize));
*lp = nil; runtime·throw("empty span");
l->nlist -= n; }
c->alloc[sizeclass] = s;
// Return them to central free list. m->locks--;
runtime·MCentral_FreeList(&runtime·mheap.central[sizeclass], first); return s;
} }
void void
runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size) runtime·MCache_Free(MCache *c, MLink *p, int32 sizeclass, uintptr size)
{ {
MCacheList *l; MCacheList *l;
MLink *p;
// Put back on list. // Put on free list.
l = &c->list[sizeclass]; l = &c->free[sizeclass];
p = v;
p->next = l->list; p->next = l->list;
l->list = p; l->list = p;
l->nlist++; l->nlist++;
c->local_cachealloc -= size; c->local_cachealloc -= size;
// We transfer span at a time from MCentral to MCache, // We transfer a span at a time from MCentral to MCache,
// if we have 2 times more than that, release a half back. // so we'll do the same in the other direction.
if(l->nlist >= 2*(runtime·class_to_allocnpages[sizeclass]<<PageShift)/size) if(l->nlist >= (runtime·class_to_allocnpages[sizeclass]<<PageShift)/size) {
ReleaseN(l, l->nlist/2, sizeclass); runtime·MCentral_FreeList(&runtime·mheap.central[sizeclass], l->list);
l->list = nil;
l->nlist = 0;
}
} }
void void
runtime·MCache_ReleaseAll(MCache *c) runtime·MCache_ReleaseAll(MCache *c)
{ {
int32 i; int32 i;
MSpan *s;
MCacheList *l; MCacheList *l;
for(i=0; i<NumSizeClasses; i++) { for(i=0; i<NumSizeClasses; i++) {
l = &c->list[i]; s = c->alloc[i];
if(l->list) { if(s != &emptymspan) {
runtime·MCentral_UncacheSpan(&runtime·mheap.central[i], s);
c->alloc[i] = &emptymspan;
}
l = &c->free[i];
if(l->nlist > 0) {
runtime·MCentral_FreeList(&runtime·mheap.central[i], l->list); runtime·MCentral_FreeList(&runtime·mheap.central[i], l->list);
l->list = nil; l->list = nil;
l->nlist = 0; l->nlist = 0;
......
...@@ -19,7 +19,8 @@ ...@@ -19,7 +19,8 @@
#include "malloc.h" #include "malloc.h"
static bool MCentral_Grow(MCentral *c); static bool MCentral_Grow(MCentral *c);
static void MCentral_Free(MCentral *c, void *v); static void MCentral_Free(MCentral *c, MLink *v);
static void MCentral_ReturnToHeap(MCentral *c, MSpan *s);
// Initialize a single central free list. // Initialize a single central free list.
void void
...@@ -30,12 +31,9 @@ runtime·MCentral_Init(MCentral *c, int32 sizeclass) ...@@ -30,12 +31,9 @@ runtime·MCentral_Init(MCentral *c, int32 sizeclass)
runtime·MSpanList_Init(&c->empty); runtime·MSpanList_Init(&c->empty);
} }
// Allocate a list of objects from the central free list. // Allocate a span to use in an MCache.
// Return the number of objects allocated. MSpan*
// The objects are linked together by their first words. runtime·MCentral_CacheSpan(MCentral *c)
// On return, *pfirst points at the first object.
int32
runtime·MCentral_AllocList(MCentral *c, MLink **pfirst)
{ {
MSpan *s; MSpan *s;
int32 cap, n; int32 cap, n;
...@@ -85,25 +83,63 @@ retry: ...@@ -85,25 +83,63 @@ retry:
// Replenish central list if empty. // Replenish central list if empty.
if(!MCentral_Grow(c)) { if(!MCentral_Grow(c)) {
runtime·unlock(c); runtime·unlock(c);
*pfirst = nil; return nil;
return 0;
} }
s = c->nonempty.next; goto retry;
havespan: havespan:
cap = (s->npages << PageShift) / s->elemsize; cap = (s->npages << PageShift) / s->elemsize;
n = cap - s->ref; n = cap - s->ref;
*pfirst = s->freelist; if(n == 0)
s->freelist = nil; runtime·throw("empty span");
s->ref += n; if(s->freelist == nil)
runtime·throw("freelist empty");
c->nfree -= n; c->nfree -= n;
runtime·MSpanList_Remove(s); runtime·MSpanList_Remove(s);
runtime·MSpanList_InsertBack(&c->empty, s); runtime·MSpanList_InsertBack(&c->empty, s);
s->incache = true;
runtime·unlock(c);
return s;
}
// Return span from an MCache.
void
runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s)
{
MLink *v;
int32 cap, n;
runtime·lock(c);
s->incache = false;
// Move any explicitly freed items from the freebuf to the freelist.
while((v = s->freebuf) != nil) {
s->freebuf = v->next;
runtime·markfreed(v);
v->next = s->freelist;
s->freelist = v;
s->ref--;
}
if(s->ref == 0) {
// Free back to heap. Unlikely, but possible.
MCentral_ReturnToHeap(c, s); // unlocks c
return;
}
cap = (s->npages << PageShift) / s->elemsize;
n = cap - s->ref;
if(n > 0) {
c->nfree += n;
runtime·MSpanList_Remove(s);
runtime·MSpanList_Insert(&c->nonempty, s);
}
runtime·unlock(c); runtime·unlock(c);
return n;
} }
// Free the list of objects back into the central free list. // Free the list of objects back into the central free list c.
// Called from runtime·free.
void void
runtime·MCentral_FreeList(MCentral *c, MLink *start) runtime·MCentral_FreeList(MCentral *c, MLink *start)
{ {
...@@ -118,52 +154,58 @@ runtime·MCentral_FreeList(MCentral *c, MLink *start) ...@@ -118,52 +154,58 @@ runtime·MCentral_FreeList(MCentral *c, MLink *start)
} }
// Helper: free one object back into the central free list. // Helper: free one object back into the central free list.
// Caller must hold lock on c on entry. Holds lock on exit.
static void static void
MCentral_Free(MCentral *c, void *v) MCentral_Free(MCentral *c, MLink *v)
{ {
MSpan *s; MSpan *s;
MLink *p;
int32 size;
// Find span for v. // Find span for v.
s = runtime·MHeap_Lookup(&runtime·mheap, v); s = runtime·MHeap_Lookup(&runtime·mheap, v);
if(s == nil || s->ref == 0) if(s == nil || s->ref == 0)
runtime·throw("invalid free"); runtime·throw("invalid free");
if(s->sweepgen != runtime·mheap.sweepgen)
runtime·throw("free into unswept span");
// If the span is currently being used unsynchronized by an MCache,
// we can't modify the freelist. Add to the freebuf instead. The
// items will get moved to the freelist when the span is returned
// by the MCache.
if(s->incache) {
v->next = s->freebuf;
s->freebuf = v;
return;
}
// Move to nonempty if necessary. // Move span to nonempty if necessary.
if(s->freelist == nil) { if(s->freelist == nil) {
runtime·MSpanList_Remove(s); runtime·MSpanList_Remove(s);
runtime·MSpanList_Insert(&c->nonempty, s); runtime·MSpanList_Insert(&c->nonempty, s);
} }
// Add v back to s's free list. // Add the object to span's free list.
p = v; runtime·markfreed(v);
p->next = s->freelist; v->next = s->freelist;
s->freelist = p; s->freelist = v;
s->ref--;
c->nfree++; c->nfree++;
// If s is completely freed, return it to the heap. // If s is completely freed, return it to the heap.
if(--s->ref == 0) { if(s->ref == 0) {
size = runtime·class_to_size[c->sizeclass]; MCentral_ReturnToHeap(c, s); // unlocks c
runtime·MSpanList_Remove(s);
runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
s->needzero = 1;
s->freelist = nil;
c->nfree -= (s->npages << PageShift) / size;
runtime·unlock(c);
runtime·MHeap_Free(&runtime·mheap, s, 0);
runtime·lock(c); runtime·lock(c);
} }
} }
// Free n objects from a span s back into the central free list c. // Free n objects from a span s back into the central free list c.
// Called during sweep. // Called during sweep.
// Returns true if the span was returned to heap. // Returns true if the span was returned to heap. Sets sweepgen to
// the latest generation.
bool bool
runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end) runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end)
{ {
int32 size; if(s->incache)
runtime·throw("freespan into cached span");
runtime·lock(c); runtime·lock(c);
// Move to nonempty if necessary. // Move to nonempty if necessary.
...@@ -177,6 +219,12 @@ runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink * ...@@ -177,6 +219,12 @@ runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *
s->freelist = start; s->freelist = start;
s->ref -= n; s->ref -= n;
c->nfree += n; c->nfree += n;
// delay updating sweepgen until here. This is the signal that
// the span may be used in an MCache, so it must come after the
// linked list operations above (actually, just after the
// lock of c above.)
runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
if(s->ref != 0) { if(s->ref != 0) {
runtime·unlock(c); runtime·unlock(c);
...@@ -184,14 +232,7 @@ runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink * ...@@ -184,14 +232,7 @@ runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *
} }
// s is completely freed, return it to the heap. // s is completely freed, return it to the heap.
size = runtime·class_to_size[c->sizeclass]; MCentral_ReturnToHeap(c, s); // unlocks c
runtime·MSpanList_Remove(s);
s->needzero = 1;
s->freelist = nil;
c->nfree -= (s->npages << PageShift) / size;
runtime·unlock(c);
runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
runtime·MHeap_Free(&runtime·mheap, s, 0);
return true; return true;
} }
...@@ -246,3 +287,21 @@ MCentral_Grow(MCentral *c) ...@@ -246,3 +287,21 @@ MCentral_Grow(MCentral *c)
runtime·MSpanList_Insert(&c->nonempty, s); runtime·MSpanList_Insert(&c->nonempty, s);
return true; return true;
} }
// Return s to the heap. s must be unused (s->ref == 0). Unlocks c.
static void
MCentral_ReturnToHeap(MCentral *c, MSpan *s)
{
int32 size;
size = runtime·class_to_size[c->sizeclass];
runtime·MSpanList_Remove(s);
s->needzero = 1;
s->freelist = nil;
if(s->ref != 0)
runtime·throw("ref wrong");
c->nfree -= (s->npages << PageShift) / size;
runtime·unlock(c);
runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
runtime·MHeap_Free(&runtime·mheap, s, 0);
}
...@@ -1865,7 +1865,13 @@ runtime·MSpan_Sweep(MSpan *s) ...@@ -1865,7 +1865,13 @@ runtime·MSpan_Sweep(MSpan *s)
} }
} }
if(!sweepgenset) { // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
// because of the potential for a concurrent free/SetFinalizer.
// But we need to set it before we make the span available for allocation
// (return it to heap or mcentral), because allocation code assumes that a
// span is already swept if available for allocation.
if(!sweepgenset && nfree == 0) {
// The span must be in our exclusive ownership until we update sweepgen, // The span must be in our exclusive ownership until we update sweepgen,
// check for potential races. // check for potential races.
if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) { if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
...@@ -1875,11 +1881,12 @@ runtime·MSpan_Sweep(MSpan *s) ...@@ -1875,11 +1881,12 @@ runtime·MSpan_Sweep(MSpan *s)
} }
runtime·atomicstore(&s->sweepgen, sweepgen); runtime·atomicstore(&s->sweepgen, sweepgen);
} }
if(nfree) { if(nfree > 0) {
c->local_nsmallfree[cl] += nfree; c->local_nsmallfree[cl] += nfree;
c->local_cachealloc -= nfree * size; c->local_cachealloc -= nfree * size;
runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (gcpercent + 100)/100)); runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (gcpercent + 100)/100));
res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end); res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end);
//MCentral_FreeSpan updates sweepgen
} }
return res; return res;
} }
...@@ -1948,6 +1955,8 @@ runtime·sweepone(void) ...@@ -1948,6 +1955,8 @@ runtime·sweepone(void)
} }
if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1)) if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
continue; continue;
if(s->incache)
runtime·throw("sweep of incache span");
npages = s->npages; npages = s->npages;
if(!runtime·MSpan_Sweep(s)) if(!runtime·MSpan_Sweep(s))
npages = 0; npages = 0;
...@@ -2292,7 +2301,6 @@ gc(struct gc_args *args) ...@@ -2292,7 +2301,6 @@ gc(struct gc_args *args)
int64 t0, t1, t2, t3, t4; int64 t0, t1, t2, t3, t4;
uint64 heap0, heap1, obj, ninstr; uint64 heap0, heap1, obj, ninstr;
GCStats stats; GCStats stats;
M *mp;
uint32 i; uint32 i;
Eface eface; Eface eface;
...@@ -2302,9 +2310,6 @@ gc(struct gc_args *args) ...@@ -2302,9 +2310,6 @@ gc(struct gc_args *args)
if(CollectStats) if(CollectStats)
runtime·memclr((byte*)&gcstats, sizeof(gcstats)); runtime·memclr((byte*)&gcstats, sizeof(gcstats));
for(mp=runtime·allm; mp; mp=mp->alllink)
runtime·settype_flush(mp);
m->locks++; // disable gc during mallocs in parforalloc m->locks++; // disable gc during mallocs in parforalloc
if(work.markfor == nil) if(work.markfor == nil)
work.markfor = runtime·parforalloc(MaxGcproc); work.markfor = runtime·parforalloc(MaxGcproc);
...@@ -2617,59 +2622,30 @@ runtime·marknogc(void *v) ...@@ -2617,59 +2622,30 @@ runtime·marknogc(void *v)
void void
runtime·markscan(void *v) runtime·markscan(void *v)
{ {
uintptr *b, obits, bits, off, shift; uintptr *b, off, shift;
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord; shift = off % wordsPerBitmapWord;
*b |= bitScan<<shift;
for(;;) {
obits = *b;
if((obits>>shift & bitMask) != bitAllocated)
runtime·throw("bad initial state for markscan");
bits = obits | bitScan<<shift;
if(runtime·gomaxprocs == 1) {
*b = bits;
break;
} else {
// more than one goroutine is potentially running: use atomic op
if(runtime·casp((void**)b, (void*)obits, (void*)bits))
break;
}
}
} }
// mark the block at v of size n as freed. // mark the block at v as freed.
void void
runtime·markfreed(void *v, uintptr n) runtime·markfreed(void *v)
{ {
uintptr *b, obits, bits, off, shift; uintptr *b, off, shift;
if(0) if(0)
runtime·printf("markfreed %p+%p\n", v, n); runtime·printf("markfreed %p\n", v);
if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start) if((byte*)v > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
runtime·throw("markfreed: bad pointer"); runtime·throw("markfreed: bad pointer");
off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start; // word offset
b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1; b = (uintptr*)runtime·mheap.arena_start - off/wordsPerBitmapWord - 1;
shift = off % wordsPerBitmapWord; shift = off % wordsPerBitmapWord;
*b = (*b & ~(bitMask<<shift)) | (bitAllocated<<shift);
for(;;) {
obits = *b;
// This could be a free of a gc-eligible object (bitAllocated + others) or
// a FlagNoGC object (bitBlockBoundary set). In either case, we revert to
// a simple no-scan allocated object because it is going on a free list.
bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
if(runtime·gomaxprocs == 1) {
*b = bits;
break;
} else {
// more than one goroutine is potentially running: use atomic op
if(runtime·casp((void**)b, (void*)obits, (void*)bits))
break;
}
}
} }
// check that the block at v of size n is marked freed. // check that the block at v of size n is marked freed.
......
...@@ -571,6 +571,7 @@ runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages) ...@@ -571,6 +571,7 @@ runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages)
span->freelist = nil; span->freelist = nil;
span->ref = 0; span->ref = 0;
span->sizeclass = 0; span->sizeclass = 0;
span->incache = false;
span->elemsize = 0; span->elemsize = 0;
span->state = MSpanDead; span->state = MSpanDead;
span->unusedsince = 0; span->unusedsince = 0;
...@@ -579,6 +580,7 @@ runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages) ...@@ -579,6 +580,7 @@ runtime·MSpan_Init(MSpan *span, PageID start, uintptr npages)
span->specialLock.key = 0; span->specialLock.key = 0;
span->specials = nil; span->specials = nil;
span->needzero = 0; span->needzero = 0;
span->freebuf = nil;
} }
// Initialize an empty doubly-linked list. // Initialize an empty doubly-linked list.
......
...@@ -351,10 +351,6 @@ struct M ...@@ -351,10 +351,6 @@ struct M
bool needextram; bool needextram;
bool (*waitunlockf)(G*, void*); bool (*waitunlockf)(G*, void*);
void* waitlock; void* waitlock;
uintptr settype_buf[1024];
uintptr settype_bufsize;
#ifdef GOOS_windows #ifdef GOOS_windows
void* thread; // thread handle void* thread; // thread handle
// these are here because they are too large to be on the stack // these are here because they are too large to be on the stack
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment