Commit 66d5c9b1 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov Committed by Russ Cox

runtime: add per-M caches for MemStats

Avoid touching centralized state during
memory manager opreations.

R=rsc
CC=golang-dev
https://golang.org/cl/4766042
parent 257df171
...@@ -38,18 +38,18 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) ...@@ -38,18 +38,18 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
if(size == 0) if(size == 0)
size = 1; size = 1;
mstats.nmalloc++; c = m->mcache;
c->local_nmalloc++;
if(size <= MaxSmallSize) { if(size <= MaxSmallSize) {
// Allocate from mcache free lists. // Allocate from mcache free lists.
sizeclass = runtime·SizeToClass(size); sizeclass = runtime·SizeToClass(size);
size = runtime·class_to_size[sizeclass]; size = runtime·class_to_size[sizeclass];
c = m->mcache;
v = runtime·MCache_Alloc(c, sizeclass, size, zeroed); v = runtime·MCache_Alloc(c, sizeclass, size, zeroed);
if(v == nil) if(v == nil)
runtime·throw("out of memory"); runtime·throw("out of memory");
mstats.alloc += size; c->local_alloc += size;
mstats.total_alloc += size; c->local_total_alloc += size;
mstats.by_size[sizeclass].nmalloc++; c->local_by_size[sizeclass].nmalloc++;
} else { } else {
// TODO(rsc): Report tracebacks for very large allocations. // TODO(rsc): Report tracebacks for very large allocations.
...@@ -61,8 +61,8 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed) ...@@ -61,8 +61,8 @@ runtime·mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
if(s == nil) if(s == nil)
runtime·throw("out of memory"); runtime·throw("out of memory");
size = npages<<PageShift; size = npages<<PageShift;
mstats.alloc += size; c->local_alloc += size;
mstats.total_alloc += size; c->local_total_alloc += size;
v = (void*)(s->start << PageShift); v = (void*)(s->start << PageShift);
// setup for mark sweep // setup for mark sweep
...@@ -128,6 +128,7 @@ runtime·free(void *v) ...@@ -128,6 +128,7 @@ runtime·free(void *v)
// Find size class for v. // Find size class for v.
sizeclass = s->sizeclass; sizeclass = s->sizeclass;
c = m->mcache;
if(sizeclass == 0) { if(sizeclass == 0) {
// Large object. // Large object.
size = s->npages<<PageShift; size = s->npages<<PageShift;
...@@ -139,7 +140,6 @@ runtime·free(void *v) ...@@ -139,7 +140,6 @@ runtime·free(void *v)
runtime·MHeap_Free(&runtime·mheap, s, 1); runtime·MHeap_Free(&runtime·mheap, s, 1);
} else { } else {
// Small object. // Small object.
c = m->mcache;
size = runtime·class_to_size[sizeclass]; size = runtime·class_to_size[sizeclass];
if(size > sizeof(uintptr)) if(size > sizeof(uintptr))
((uintptr*)v)[1] = 1; // mark as "needs to be zeroed" ((uintptr*)v)[1] = 1; // mark as "needs to be zeroed"
...@@ -147,10 +147,10 @@ runtime·free(void *v) ...@@ -147,10 +147,10 @@ runtime·free(void *v)
// it might coalesce v and other blocks into a bigger span // it might coalesce v and other blocks into a bigger span
// and change the bitmap further. // and change the bitmap further.
runtime·markfreed(v, size); runtime·markfreed(v, size);
mstats.by_size[sizeclass].nfree++; c->local_by_size[sizeclass].nfree++;
runtime·MCache_Free(c, v, sizeclass, size); runtime·MCache_Free(c, v, sizeclass, size);
} }
mstats.alloc -= size; c->local_alloc -= size;
if(prof) if(prof)
runtime·MProf_Free(v, size); runtime·MProf_Free(v, size);
m->mallocing = 0; m->mallocing = 0;
...@@ -163,7 +163,7 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) ...@@ -163,7 +163,7 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
byte *p; byte *p;
MSpan *s; MSpan *s;
mstats.nlookup++; m->mcache->local_nlookup++;
s = runtime·MHeap_LookupMaybe(&runtime·mheap, v); s = runtime·MHeap_LookupMaybe(&runtime·mheap, v);
if(sp) if(sp)
*sp = s; *sp = s;
...@@ -192,9 +192,10 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp) ...@@ -192,9 +192,10 @@ runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
} }
n = runtime·class_to_size[s->sizeclass]; n = runtime·class_to_size[s->sizeclass];
i = ((byte*)v - p)/n; if(base) {
if(base) i = ((byte*)v - p)/n;
*base = p + i*n; *base = p + i*n;
}
if(size) if(size)
*size = n; *size = n;
...@@ -214,6 +215,29 @@ runtime·allocmcache(void) ...@@ -214,6 +215,29 @@ runtime·allocmcache(void)
return c; return c;
} }
void
runtime·purgecachedstats(M* m)
{
MCache *c;
// Protected by either heap or GC lock.
c = m->mcache;
mstats.heap_alloc += c->local_cachealloc;
c->local_cachealloc = 0;
mstats.heap_objects += c->local_objects;
c->local_objects = 0;
mstats.nmalloc += c->local_nmalloc;
c->local_nmalloc = 0;
mstats.nfree += c->local_nfree;
c->local_nfree = 0;
mstats.nlookup += c->local_nlookup;
c->local_nlookup = 0;
mstats.alloc += c->local_alloc;
c->local_alloc= 0;
mstats.total_alloc += c->local_total_alloc;
c->local_total_alloc= 0;
}
uintptr runtime·sizeof_C_MStats = sizeof(MStats); uintptr runtime·sizeof_C_MStats = sizeof(MStats);
#define MaxArena32 (2U<<30) #define MaxArena32 (2U<<30)
...@@ -361,9 +385,6 @@ func new(n uint32) (ret *uint8) { ...@@ -361,9 +385,6 @@ func new(n uint32) (ret *uint8) {
void* void*
runtime·stackalloc(uint32 n) runtime·stackalloc(uint32 n)
{ {
void *v;
uintptr sys0;
// Stackalloc must be called on scheduler stack, so that we // Stackalloc must be called on scheduler stack, so that we
// never try to grow the stack during the code that stackalloc runs. // never try to grow the stack during the code that stackalloc runs.
// Doing so would cause a deadlock (issue 1547). // Doing so would cause a deadlock (issue 1547).
...@@ -382,11 +403,7 @@ runtime·stackalloc(uint32 n) ...@@ -382,11 +403,7 @@ runtime·stackalloc(uint32 n)
runtime·printf("stackalloc: in malloc, size=%d want %d", FixedStack, n); runtime·printf("stackalloc: in malloc, size=%d want %d", FixedStack, n);
runtime·throw("stackalloc"); runtime·throw("stackalloc");
} }
sys0 = m->stackalloc->sys; return runtime·FixAlloc_Alloc(m->stackalloc);
v = runtime·FixAlloc_Alloc(m->stackalloc);
mstats.stacks_inuse += FixedStack;
mstats.stacks_sys += m->stackalloc->sys - sys0;
return v;
} }
return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0); return runtime·mallocgc(n, FlagNoProfiling|FlagNoGC, 0, 0);
} }
...@@ -394,13 +411,8 @@ runtime·stackalloc(uint32 n) ...@@ -394,13 +411,8 @@ runtime·stackalloc(uint32 n)
void void
runtime·stackfree(void *v, uintptr n) runtime·stackfree(void *v, uintptr n)
{ {
uintptr sys0;
if(m->mallocing || m->gcing || n == FixedStack) { if(m->mallocing || m->gcing || n == FixedStack) {
sys0 = m->stackalloc->sys;
runtime·FixAlloc_Free(m->stackalloc, v); runtime·FixAlloc_Free(m->stackalloc, v);
mstats.stacks_inuse -= FixedStack;
mstats.stacks_sys += m->stackalloc->sys - sys0;
return; return;
} }
runtime·free(v); runtime·free(v);
......
...@@ -185,10 +185,10 @@ void runtime·FixAlloc_Free(FixAlloc *f, void *p); ...@@ -185,10 +185,10 @@ void runtime·FixAlloc_Free(FixAlloc *f, void *p);
// Shared with Go: if you edit this structure, also edit extern.go. // Shared with Go: if you edit this structure, also edit extern.go.
struct MStats struct MStats
{ {
// General statistics. No locking; approximate. // General statistics.
uint64 alloc; // bytes allocated and still in use uint64 alloc; // bytes allocated and still in use
uint64 total_alloc; // bytes allocated (even if freed) uint64 total_alloc; // bytes allocated (even if freed)
uint64 sys; // bytes obtained from system (should be sum of xxx_sys below) uint64 sys; // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
uint64 nlookup; // number of pointer lookups uint64 nlookup; // number of pointer lookups
uint64 nmalloc; // number of mallocs uint64 nmalloc; // number of mallocs
uint64 nfree; // number of frees uint64 nfree; // number of frees
...@@ -221,7 +221,6 @@ struct MStats ...@@ -221,7 +221,6 @@ struct MStats
bool debuggc; bool debuggc;
// Statistics about allocation size classes. // Statistics about allocation size classes.
// No locking; approximate.
struct { struct {
uint32 size; uint32 size;
uint64 nmalloc; uint64 nmalloc;
...@@ -267,9 +266,20 @@ struct MCache ...@@ -267,9 +266,20 @@ struct MCache
{ {
MCacheList list[NumSizeClasses]; MCacheList list[NumSizeClasses];
uint64 size; uint64 size;
int64 local_alloc; // bytes allocated (or freed) since last lock of heap int64 local_cachealloc; // bytes allocated (or freed) from cache since last lock of heap
int64 local_objects; // objects allocated (or freed) since last lock of heap int64 local_objects; // objects allocated (or freed) from cache since last lock of heap
int64 local_alloc; // bytes allocated and still in use since last lock of heap
int64 local_total_alloc; // bytes allocated (even if freed) since last lock of heap
int64 local_nmalloc; // number of mallocs since last lock of heap
int64 local_nfree; // number of frees since last lock of heap
int64 local_nlookup; // number of pointer lookups since last lock of heap
int32 next_sample; // trigger heap sample after allocating this many bytes int32 next_sample; // trigger heap sample after allocating this many bytes
// Statistics about allocation size classes since last lock of heap
struct {
int64 nmalloc;
int64 nfree;
} local_by_size[NumSizeClasses];
}; };
void* runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed); void* runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed);
...@@ -378,6 +388,7 @@ void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover); ...@@ -378,6 +388,7 @@ void runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
void runtime·unmarkspan(void *v, uintptr size); void runtime·unmarkspan(void *v, uintptr size);
bool runtime·blockspecial(void*); bool runtime·blockspecial(void*);
void runtime·setblockspecial(void*); void runtime·setblockspecial(void*);
void runtime·purgecachedstats(M*);
enum enum
{ {
......
...@@ -48,7 +48,7 @@ runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed) ...@@ -48,7 +48,7 @@ runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed)
v->next = nil; v->next = nil;
} }
} }
c->local_alloc += size; c->local_cachealloc += size;
c->local_objects++; c->local_objects++;
return v; return v;
} }
...@@ -90,7 +90,7 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size) ...@@ -90,7 +90,7 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size)
l->list = p; l->list = p;
l->nlist++; l->nlist++;
c->size += size; c->size += size;
c->local_alloc -= size; c->local_cachealloc -= size;
c->local_objects--; c->local_objects--;
if(l->nlist >= MaxMCacheListLen) { if(l->nlist >= MaxMCacheListLen) {
......
...@@ -484,6 +484,7 @@ sweep(void) ...@@ -484,6 +484,7 @@ sweep(void)
// Mark freed; restore block boundary bit. // Mark freed; restore block boundary bit.
*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift); *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
c = m->mcache;
if(s->sizeclass == 0) { if(s->sizeclass == 0) {
// Free large span. // Free large span.
runtime·unmarkspan(p, 1<<PageShift); runtime·unmarkspan(p, 1<<PageShift);
...@@ -491,14 +492,13 @@ sweep(void) ...@@ -491,14 +492,13 @@ sweep(void)
runtime·MHeap_Free(&runtime·mheap, s, 1); runtime·MHeap_Free(&runtime·mheap, s, 1);
} else { } else {
// Free small object. // Free small object.
c = m->mcache;
if(size > sizeof(uintptr)) if(size > sizeof(uintptr))
((uintptr*)p)[1] = 1; // mark as "needs to be zeroed" ((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
mstats.by_size[s->sizeclass].nfree++; c->local_by_size[s->sizeclass].nfree++;
runtime·MCache_Free(c, p, s->sizeclass, size); runtime·MCache_Free(c, p, s->sizeclass, size);
} }
mstats.alloc -= size; c->local_alloc -= size;
mstats.nfree++; c->local_nfree++;
} }
} }
} }
...@@ -533,14 +533,26 @@ cachestats(void) ...@@ -533,14 +533,26 @@ cachestats(void)
{ {
M *m; M *m;
MCache *c; MCache *c;
int32 i;
uint64 stacks_inuse;
uint64 stacks_sys;
stacks_inuse = 0;
stacks_sys = 0;
for(m=runtime·allm; m; m=m->alllink) { for(m=runtime·allm; m; m=m->alllink) {
runtime·purgecachedstats(m);
stacks_inuse += m->stackalloc->inuse;
stacks_sys += m->stackalloc->sys;
c = m->mcache; c = m->mcache;
mstats.heap_alloc += c->local_alloc; for(i=0; i<nelem(c->local_by_size); i++) {
c->local_alloc = 0; mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
mstats.heap_objects += c->local_objects; c->local_by_size[i].nmalloc = 0;
c->local_objects = 0; mstats.by_size[i].nfree += c->local_by_size[i].nfree;
c->local_by_size[i].nfree = 0;
}
} }
mstats.stacks_inuse = stacks_inuse;
mstats.stacks_sys = stacks_sys;
} }
void void
...@@ -603,6 +615,7 @@ runtime·gc(int32 force) ...@@ -603,6 +615,7 @@ runtime·gc(int32 force)
sweep(); sweep();
t2 = runtime·nanotime(); t2 = runtime·nanotime();
stealcache(); stealcache();
cachestats();
mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
m->gcing = 0; m->gcing = 0;
......
...@@ -57,10 +57,7 @@ runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct) ...@@ -57,10 +57,7 @@ runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, int32 acct)
MSpan *s; MSpan *s;
runtime·lock(h); runtime·lock(h);
mstats.heap_alloc += m->mcache->local_alloc; runtime·purgecachedstats(m);
m->mcache->local_alloc = 0;
mstats.heap_objects += m->mcache->local_objects;
m->mcache->local_objects = 0;
s = MHeap_AllocLocked(h, npage, sizeclass); s = MHeap_AllocLocked(h, npage, sizeclass);
if(s != nil) { if(s != nil) {
mstats.heap_inuse += npage<<PageShift; mstats.heap_inuse += npage<<PageShift;
...@@ -258,10 +255,7 @@ void ...@@ -258,10 +255,7 @@ void
runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct) runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct)
{ {
runtime·lock(h); runtime·lock(h);
mstats.heap_alloc += m->mcache->local_alloc; runtime·purgecachedstats(m);
m->mcache->local_alloc = 0;
mstats.heap_objects += m->mcache->local_objects;
m->mcache->local_objects = 0;
mstats.heap_inuse -= s->npages<<PageShift; mstats.heap_inuse -= s->npages<<PageShift;
if(acct) { if(acct) {
mstats.heap_alloc -= s->npages<<PageShift; mstats.heap_alloc -= s->npages<<PageShift;
......
...@@ -47,6 +47,7 @@ func main() { ...@@ -47,6 +47,7 @@ func main() {
b := runtime.Alloc(uintptr(j)) b := runtime.Alloc(uintptr(j))
during := runtime.MemStats.Alloc during := runtime.MemStats.Alloc
runtime.Free(b) runtime.Free(b)
runtime.GC()
if a := runtime.MemStats.Alloc; a != 0 { if a := runtime.MemStats.Alloc; a != 0 {
println("allocated ", j, ": wrong stats: during=", during, " after=", a, " (want 0)") println("allocated ", j, ": wrong stats: during=", during, " after=", a, " (want 0)")
panic("fail") panic("fail")
......
...@@ -60,6 +60,7 @@ func AllocAndFree(size, count int) { ...@@ -60,6 +60,7 @@ func AllocAndFree(size, count int) {
fmt.Printf("size=%d count=%d stats=%+v\n", size, count, *stats) fmt.Printf("size=%d count=%d stats=%+v\n", size, count, *stats)
} }
n3 := stats.Alloc n3 := stats.Alloc
runtime.GC()
for j := 0; j < count; j++ { for j := 0; j < count; j++ {
i := j i := j
if *reverse { if *reverse {
...@@ -72,6 +73,7 @@ func AllocAndFree(size, count int) { ...@@ -72,6 +73,7 @@ func AllocAndFree(size, count int) {
panic("fail") panic("fail")
} }
runtime.Free(b[i]) runtime.Free(b[i])
runtime.GC()
if stats.Alloc != uint64(alloc-n) { if stats.Alloc != uint64(alloc-n) {
println("free alloc got", stats.Alloc, "expected", alloc-n, "after free of", n) println("free alloc got", stats.Alloc, "expected", alloc-n, "after free of", n)
panic("fail") panic("fail")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment