Commit f378f300 authored by Keith Randall's avatar Keith Randall

undo CL 101570044 / 2c57aaea79c4

redo stack allocation.  This is mostly the same as
the original CL with a few bug fixes.

1. add racemalloc() for stack allocations
2. fix poolalloc/poolfree to terminate free lists correctly.
3. adjust span ref count correctly.
4. don't use cache for sizes >= StackCacheSize.

Should fix bugs and memory leaks in original changelist.

««« original CL description
undo CL 104200047 / 318b04f28372

Breaks windows and race detector.
TBR=rsc

««« original CL description
runtime: stack allocator, separate from mallocgc

In order to move malloc to Go, we need to have a
separate stack allocator.  If we run out of stack
during malloc, malloc will not be available
to allocate a new stack.

Stacks are the last remaining FlagNoGC objects in the
GC heap.  Once they are out, we can get rid of the
distinction between the allocated/blockboundary bits.
(This will be in a separate change.)

Fixes #7468
Fixes #7424

LGTM=rsc, dvyukov
R=golang-codereviews, dvyukov, khr, dave, rsc
CC=golang-codereviews
https://golang.org/cl/104200047
»»»

TBR=rsc
CC=golang-codereviews
https://golang.org/cl/101570044
»»»

LGTM=dvyukov
R=dvyukov, dave, khr, alex.brainman
CC=golang-codereviews
https://golang.org/cl/112240044
parent 6b2aabee
...@@ -116,6 +116,12 @@ enum ...@@ -116,6 +116,12 @@ enum
MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap. MaxMHeapList = 1<<(20 - PageShift), // Maximum page length for fixed-size list in MHeap.
HeapAllocChunk = 1<<20, // Chunk size for heap growth HeapAllocChunk = 1<<20, // Chunk size for heap growth
// Per-P, per order stack segment cache size.
StackCacheSize = 32*1024,
// Number of orders that get caching. Order 0 is FixedStack
// and each successive order is twice as large.
NumStackOrders = 3,
// Number of bits in page to span calculations (4k pages). // Number of bits in page to span calculations (4k pages).
// On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason). // On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason).
// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits. // On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
...@@ -247,8 +253,8 @@ struct MStats ...@@ -247,8 +253,8 @@ struct MStats
// Statistics about allocation of low-level fixed-size structures. // Statistics about allocation of low-level fixed-size structures.
// Protected by FixAlloc locks. // Protected by FixAlloc locks.
uint64 stacks_inuse; // bootstrap stacks uint64 stacks_inuse; // this number is included in heap_inuse above
uint64 stacks_sys; uint64 stacks_sys; // always 0 in mstats
uint64 mspan_inuse; // MSpan structures uint64 mspan_inuse; // MSpan structures
uint64 mspan_sys; uint64 mspan_sys;
uint64 mcache_inuse; // MCache structures uint64 mcache_inuse; // MCache structures
...@@ -305,6 +311,13 @@ struct MCacheList ...@@ -305,6 +311,13 @@ struct MCacheList
uint32 nlist; uint32 nlist;
}; };
typedef struct StackFreeList StackFreeList;
struct StackFreeList
{
MLink *list; // linked list of free stacks
uintptr size; // total size of stacks in list
};
// Per-thread (in Go, per-P) cache for small objects. // Per-thread (in Go, per-P) cache for small objects.
// No locking needed because it is per-thread (per-P). // No locking needed because it is per-thread (per-P).
struct MCache struct MCache
...@@ -320,6 +333,9 @@ struct MCache ...@@ -320,6 +333,9 @@ struct MCache
// The rest is not accessed on every malloc. // The rest is not accessed on every malloc.
MSpan* alloc[NumSizeClasses]; // spans to allocate from MSpan* alloc[NumSizeClasses]; // spans to allocate from
MCacheList free[NumSizeClasses];// lists of explicitly freed objects MCacheList free[NumSizeClasses];// lists of explicitly freed objects
StackFreeList stackcache[NumStackOrders];
// Local allocator stats, flushed during GC. // Local allocator stats, flushed during GC.
uintptr local_nlookup; // number of pointer lookups uintptr local_nlookup; // number of pointer lookups
uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize) uintptr local_largefree; // bytes freed for large objects (>MaxSmallSize)
...@@ -330,6 +346,7 @@ struct MCache ...@@ -330,6 +346,7 @@ struct MCache
MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass); MSpan* runtime·MCache_Refill(MCache *c, int32 sizeclass);
void runtime·MCache_Free(MCache *c, MLink *p, int32 sizeclass, uintptr size); void runtime·MCache_Free(MCache *c, MLink *p, int32 sizeclass, uintptr size);
void runtime·MCache_ReleaseAll(MCache *c); void runtime·MCache_ReleaseAll(MCache *c);
void runtime·stackcache_clear(MCache *c);
// MTypes describes the types of blocks allocated within a span. // MTypes describes the types of blocks allocated within a span.
// The compression field describes the layout of the data. // The compression field describes the layout of the data.
...@@ -409,7 +426,8 @@ struct SpecialProfile ...@@ -409,7 +426,8 @@ struct SpecialProfile
// An MSpan is a run of pages. // An MSpan is a run of pages.
enum enum
{ {
MSpanInUse = 0, MSpanInUse = 0, // allocated for garbage collected heap
MSpanStack, // allocated for use by stack allocator
MSpanFree, MSpanFree,
MSpanListHead, MSpanListHead,
MSpanDead, MSpanDead,
...@@ -525,7 +543,9 @@ extern MHeap runtime·mheap; ...@@ -525,7 +543,9 @@ extern MHeap runtime·mheap;
void runtime·MHeap_Init(MHeap *h); void runtime·MHeap_Init(MHeap *h);
MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero); MSpan* runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero);
MSpan* runtime·MHeap_AllocStack(MHeap *h, uintptr npage);
void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct); void runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
void runtime·MHeap_FreeStack(MHeap *h, MSpan *s);
MSpan* runtime·MHeap_Lookup(MHeap *h, void *v); MSpan* runtime·MHeap_Lookup(MHeap *h, void *v);
MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v); MSpan* runtime·MHeap_LookupMaybe(MHeap *h, void *v);
void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj); void runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *size, int32 *npages, int32 *nobj);
...@@ -533,7 +553,6 @@ void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n); ...@@ -533,7 +553,6 @@ void* runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
void runtime·MHeap_MapBits(MHeap *h); void runtime·MHeap_MapBits(MHeap *h);
void runtime·MHeap_MapSpans(MHeap *h); void runtime·MHeap_MapSpans(MHeap *h);
void runtime·MHeap_Scavenger(void); void runtime·MHeap_Scavenger(void);
void runtime·MHeap_SplitSpan(MHeap *h, MSpan *s);
void* runtime·mallocgc(uintptr size, uintptr typ, uint32 flag); void* runtime·mallocgc(uintptr size, uintptr typ, uint32 flag);
void* runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat); void* runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat);
......
...@@ -39,16 +39,35 @@ runtime·allocmcache(void) ...@@ -39,16 +39,35 @@ runtime·allocmcache(void)
return c; return c;
} }
void static void
runtime·freemcache(MCache *c) freemcache(MCache *c)
{ {
runtime·MCache_ReleaseAll(c); runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
runtime·lock(&runtime·mheap); runtime·lock(&runtime·mheap);
runtime·purgecachedstats(c); runtime·purgecachedstats(c);
runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c); runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
runtime·unlock(&runtime·mheap); runtime·unlock(&runtime·mheap);
} }
static void
freemcache_m(G *gp)
{
MCache *c;
c = g->m->ptrarg[0];
g->m->ptrarg[0] = nil;
freemcache(c);
runtime·gogo(&gp->sched);
}
void
runtime·freemcache(MCache *c)
{
g->m->ptrarg[0] = c;
runtime·mcall(freemcache_m);
}
// Gets a span that has a free object in it and assigns it // Gets a span that has a free object in it and assigns it
// to be the cached span for the given sizeclass. Returns this span. // to be the cached span for the given sizeclass. Returns this span.
MSpan* MSpan*
......
...@@ -164,6 +164,8 @@ MCentral_Free(MCentral *c, MLink *v) ...@@ -164,6 +164,8 @@ MCentral_Free(MCentral *c, MLink *v)
s = runtime·MHeap_Lookup(&runtime·mheap, v); s = runtime·MHeap_Lookup(&runtime·mheap, v);
if(s == nil || s->ref == 0) if(s == nil || s->ref == 0)
runtime·throw("invalid free"); runtime·throw("invalid free");
if(s->state != MSpanInUse)
runtime·throw("free into stack span");
if(s->sweepgen != runtime·mheap.sweepgen) if(s->sweepgen != runtime·mheap.sweepgen)
runtime·throw("free into unswept span"); runtime·throw("free into unswept span");
......
...@@ -30,7 +30,7 @@ type MemStats struct { ...@@ -30,7 +30,7 @@ type MemStats struct {
// Low-level fixed-size structure allocator statistics. // Low-level fixed-size structure allocator statistics.
// Inuse is bytes used now. // Inuse is bytes used now.
// Sys is bytes obtained from system. // Sys is bytes obtained from system.
StackInuse uint64 // bootstrap stacks StackInuse uint64 // bytes used by stack allocator
StackSys uint64 StackSys uint64
MSpanInuse uint64 // mspan structures MSpanInuse uint64 // mspan structures
MSpanSys uint64 MSpanSys uint64
......
...@@ -1252,12 +1252,12 @@ markroot(ParFor *desc, uint32 i) ...@@ -1252,12 +1252,12 @@ markroot(ParFor *desc, uint32 i)
SpecialFinalizer *spf; SpecialFinalizer *spf;
s = allspans[spanidx]; s = allspans[spanidx];
if(s->state != MSpanInUse)
continue;
if(s->sweepgen != sg) { if(s->sweepgen != sg) {
runtime·printf("sweep %d %d\n", s->sweepgen, sg); runtime·printf("sweep %d %d\n", s->sweepgen, sg);
runtime·throw("gc: unswept span"); runtime·throw("gc: unswept span");
} }
if(s->state != MSpanInUse)
continue;
// The garbage collector ignores type pointers stored in MSpan.types: // The garbage collector ignores type pointers stored in MSpan.types:
// - Compiler-generated types are stored outside of heap. // - Compiler-generated types are stored outside of heap.
// - The reflect package has runtime-generated types cached in its data structures. // - The reflect package has runtime-generated types cached in its data structures.
...@@ -2119,23 +2119,29 @@ flushallmcaches(void) ...@@ -2119,23 +2119,29 @@ flushallmcaches(void)
if(c==nil) if(c==nil)
continue; continue;
runtime·MCache_ReleaseAll(c); runtime·MCache_ReleaseAll(c);
runtime·stackcache_clear(c);
} }
} }
static void
flushallmcaches_m(G *gp)
{
flushallmcaches();
runtime·gogo(&gp->sched);
}
void void
runtime·updatememstats(GCStats *stats) runtime·updatememstats(GCStats *stats)
{ {
M *mp; M *mp;
MSpan *s; MSpan *s;
int32 i; int32 i;
uint64 stacks_inuse, smallfree; uint64 smallfree;
uint64 *src, *dst; uint64 *src, *dst;
if(stats) if(stats)
runtime·memclr((byte*)stats, sizeof(*stats)); runtime·memclr((byte*)stats, sizeof(*stats));
stacks_inuse = 0;
for(mp=runtime·allm; mp; mp=mp->alllink) { for(mp=runtime·allm; mp; mp=mp->alllink) {
stacks_inuse += mp->stackinuse*FixedStack;
if(stats) { if(stats) {
src = (uint64*)&mp->gcstats; src = (uint64*)&mp->gcstats;
dst = (uint64*)stats; dst = (uint64*)stats;
...@@ -2144,7 +2150,6 @@ runtime·updatememstats(GCStats *stats) ...@@ -2144,7 +2150,6 @@ runtime·updatememstats(GCStats *stats)
runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats)); runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
} }
} }
mstats.stacks_inuse = stacks_inuse;
mstats.mcache_inuse = runtime·mheap.cachealloc.inuse; mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
mstats.mspan_inuse = runtime·mheap.spanalloc.inuse; mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys + mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
...@@ -2167,7 +2172,7 @@ runtime·updatememstats(GCStats *stats) ...@@ -2167,7 +2172,7 @@ runtime·updatememstats(GCStats *stats)
} }
// Flush MCache's to MCentral. // Flush MCache's to MCentral.
flushallmcaches(); runtime·mcall(flushallmcaches_m);
// Aggregate local stats. // Aggregate local stats.
cachestats(); cachestats();
...@@ -2504,6 +2509,12 @@ runtime·ReadMemStats(MStats *stats) ...@@ -2504,6 +2509,12 @@ runtime·ReadMemStats(MStats *stats)
// Size of the trailing by_size array differs between Go and C, // Size of the trailing by_size array differs between Go and C,
// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility. // NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
runtime·memcopy(runtime·sizeof_C_MStats, stats, &mstats); runtime·memcopy(runtime·sizeof_C_MStats, stats, &mstats);
// Stack numbers are part of the heap numbers, separate those out for user consumption
stats->stacks_sys = stats->stacks_inuse;
stats->heap_inuse -= stats->stacks_inuse;
stats->heap_sys -= stats->stacks_inuse;
g->m->gcing = 0; g->m->gcing = 0;
g->m->locks++; g->m->locks++;
runtime·semrelease(&runtime·worldsema); runtime·semrelease(&runtime·worldsema);
......
This diff is collapsed.
...@@ -153,6 +153,7 @@ runtime·schedinit(void) ...@@ -153,6 +153,7 @@ runtime·schedinit(void)
runtime·precisestack = true; // haveexperiment("precisestack"); runtime·precisestack = true; // haveexperiment("precisestack");
runtime·symtabinit(); runtime·symtabinit();
runtime·stackinit();
runtime·mallocinit(); runtime·mallocinit();
mcommoninit(g->m); mcommoninit(g->m);
...@@ -1946,7 +1947,7 @@ gfput(P *p, G *gp) ...@@ -1946,7 +1947,7 @@ gfput(P *p, G *gp)
runtime·throw("gfput: bad stacksize"); runtime·throw("gfput: bad stacksize");
} }
top = (Stktop*)gp->stackbase; top = (Stktop*)gp->stackbase;
if(top->malloced) { if(stksize != FixedStack) {
// non-standard stack size - free it. // non-standard stack size - free it.
runtime·stackfree(gp, (void*)gp->stack0, top); runtime·stackfree(gp, (void*)gp->stack0, top);
gp->stack0 = 0; gp->stack0 = 0;
...@@ -2013,6 +2014,9 @@ retry: ...@@ -2013,6 +2014,9 @@ retry:
gp->stackbase = (uintptr)stk + FixedStack - sizeof(Stktop); gp->stackbase = (uintptr)stk + FixedStack - sizeof(Stktop);
gp->stackguard = (uintptr)stk + StackGuard; gp->stackguard = (uintptr)stk + StackGuard;
gp->stackguard0 = gp->stackguard; gp->stackguard0 = gp->stackguard;
} else {
if(raceenabled)
runtime·racemalloc((void*)gp->stack0, gp->stackbase + sizeof(Stktop) - gp->stack0);
} }
} }
return gp; return gp;
......
...@@ -146,13 +146,6 @@ enum ...@@ -146,13 +146,6 @@ enum
{ {
PtrSize = sizeof(void*), PtrSize = sizeof(void*),
}; };
enum
{
// Per-M stack segment cache size.
StackCacheSize = 32,
// Global <-> per-M stack segment cache transfer batch size.
StackCacheBatch = 16,
};
/* /*
* structures * structures
*/ */
...@@ -326,10 +319,6 @@ struct M ...@@ -326,10 +319,6 @@ struct M
M* schedlink; M* schedlink;
uint32 machport; // Return address for Mach IPC (OS X) uint32 machport; // Return address for Mach IPC (OS X)
MCache* mcache; MCache* mcache;
int32 stackinuse;
uint32 stackcachepos;
uint32 stackcachecnt;
void* stackcache[StackCacheSize];
G* lockedg; G* lockedg;
uintptr createstack[32];// Stack that created this thread. uintptr createstack[32];// Stack that created this thread.
uint32 freglo[16]; // D[i] lsb and F[i] uint32 freglo[16]; // D[i] lsb and F[i]
...@@ -346,6 +335,8 @@ struct M ...@@ -346,6 +335,8 @@ struct M
bool (*waitunlockf)(G*, void*); bool (*waitunlockf)(G*, void*);
void* waitlock; void* waitlock;
uintptr forkstackguard; uintptr forkstackguard;
uintptr scalararg[4]; // scalar argument/return for mcall
void* ptrarg[4]; // pointer argument/return for mcall
#ifdef GOOS_windows #ifdef GOOS_windows
void* thread; // thread handle void* thread; // thread handle
// these are here because they are too large to be on the stack // these are here because they are too large to be on the stack
...@@ -428,7 +419,6 @@ struct Stktop ...@@ -428,7 +419,6 @@ struct Stktop
uint8* argp; // pointer to arguments in old frame uint8* argp; // pointer to arguments in old frame
bool panic; // is this frame the top of a panic? bool panic; // is this frame the top of a panic?
bool malloced;
}; };
struct SigTab struct SigTab
{ {
...@@ -866,6 +856,7 @@ int32 runtime·funcarglen(Func*, uintptr); ...@@ -866,6 +856,7 @@ int32 runtime·funcarglen(Func*, uintptr);
int32 runtime·funcspdelta(Func*, uintptr); int32 runtime·funcspdelta(Func*, uintptr);
int8* runtime·funcname(Func*); int8* runtime·funcname(Func*);
int32 runtime·pcdatavalue(Func*, int32, uintptr); int32 runtime·pcdatavalue(Func*, int32, uintptr);
void runtime·stackinit(void);
void* runtime·stackalloc(G*, uint32); void* runtime·stackalloc(G*, uint32);
void runtime·stackfree(G*, void*, Stktop*); void runtime·stackfree(G*, void*, Stktop*);
void runtime·shrinkstack(G*); void runtime·shrinkstack(G*);
......
This diff is collapsed.
...@@ -281,3 +281,52 @@ func TestDeferPtrs(t *testing.T) { ...@@ -281,3 +281,52 @@ func TestDeferPtrs(t *testing.T) {
defer set(&y, 42) defer set(&y, 42)
growStack() growStack()
} }
// use about n KB of stack
func useStack(n int) {
if n == 0 {
return
}
var b [1024]byte // makes frame about 1KB
useStack(n - 1 + int(b[99]))
}
func growing(c chan int, done chan struct{}) {
for n := range c {
useStack(n)
done <- struct{}{}
}
done <- struct{}{}
}
func TestStackCache(t *testing.T) {
// Allocate a bunch of goroutines and grow their stacks.
// Repeat a few times to test the stack cache.
const (
R = 4
G = 200
S = 5
)
for i := 0; i < R; i++ {
var reqchans [G]chan int
done := make(chan struct{})
for j := 0; j < G; j++ {
reqchans[j] = make(chan int)
go growing(reqchans[j], done)
}
for s := 0; s < S; s++ {
for j := 0; j < G; j++ {
reqchans[j] <- 1 << uint(s)
}
for j := 0; j < G; j++ {
<-done
}
}
for j := 0; j < G; j++ {
close(reqchans[j])
}
for j := 0; j < G; j++ {
<-done
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment