Commit 1ba04c17 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: per-P defer pool

Instead of a per-goroutine stack of defers for all sizes,
introduce per-P defer pool for argument sizes 8, 24, 40, 56, 72 bytes.

For a program that starts 1e6 goroutines and then joins then:
old: rss=6.6g virtmem=10.2g time=4.85s
new: rss=4.5g virtmem= 8.2g time=3.48s

R=golang-codereviews, rsc
CC=golang-codereviews
https://golang.org/cl/42750044
parent abd588aa
......@@ -129,7 +129,6 @@ runtime·cgocall(void (*fn)(void*), void *arg)
d.link = g->defer;
d.argp = (void*)-1; // unused because unlockm never recovers
d.special = true;
d.free = false;
g->defer = &d;
m->ncgo++;
......@@ -285,7 +284,6 @@ runtime·cgocallbackg1(void)
d.link = g->defer;
d.argp = (void*)-1; // unused because unwindm never recovers
d.special = true;
d.free = false;
g->defer = &d;
if(raceenabled && !m->racecall)
......
......@@ -60,16 +60,25 @@ sync·runtime_registerPool(void **p)
static void
clearpools(void)
{
void **p, **next;
void **pool, **next;
P *p, **pp;
int32 i;
for(p = pools.head; p != nil; p = next) {
next = p[0];
p[0] = nil; // next
p[1] = nil; // slice
p[2] = nil;
p[3] = nil;
// clear sync.Pool's
for(pool = pools.head; pool != nil; pool = next) {
next = pool[0];
pool[0] = nil; // next
pool[1] = nil; // slice
pool[2] = nil;
pool[3] = nil;
}
pools.head = nil;
// clear defer pools
for(pp=runtime·allp; p=*pp; pp++) {
for(i=0; i<nelem(p->deferpool); i++)
p->deferpool[i] = nil;
}
}
// Bits in per-word bitmap.
......
......@@ -44,8 +44,10 @@ int32 runtime·class_to_allocnpages[NumSizeClasses];
int8 runtime·size_to_class8[1024/8 + 1];
int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
static int32
SizeToClass(int32 size)
void runtime·testdefersizes(void);
int32
runtime·SizeToClass(int32 size)
{
if(size > MaxSmallSize)
runtime·throw("SizeToClass - invalid size");
......@@ -119,7 +121,7 @@ runtime·InitSizes(void)
// Double-check SizeToClass.
if(0) {
for(n=0; n < MaxSmallSize; n++) {
sizeclass = SizeToClass(n);
sizeclass = runtime·SizeToClass(n);
if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime·class_to_size[sizeclass] < n) {
runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
runtime·printf("incorrect SizeToClass");
......@@ -133,6 +135,8 @@ runtime·InitSizes(void)
}
}
runtime·testdefersizes();
// Copy out for statistics table.
for(i=0; i<nelem(runtime·class_to_size); i++)
mstats.by_size[i].size = runtime·class_to_size[i];
......
......@@ -13,108 +13,63 @@
uint32 runtime·panicking;
static Lock paniclk;
enum
{
DeferChunkSize = 2048
};
// Each P holds pool for defers with arg sizes 8, 24, 40, 56 and 72 bytes.
// Memory block is 40 (24 for 32 bits) bytes larger due to Defer header.
// This maps exactly to malloc size classes.
// defer size class for arg size sz
#define DEFERCLASS(sz) (((sz)+7)>>4)
// total size of memory block for defer with arg size sz
#define TOTALSIZE(sz) (sizeof(Defer) - sizeof(((Defer*)nil)->args) + ROUND(sz, sizeof(uintptr)))
// Allocate a Defer, usually as part of the larger frame of deferred functions.
// Each defer must be released with both popdefer and freedefer.
// Allocate a Defer, usually using per-P pool.
// Each defer must be released with freedefer.
static Defer*
newdefer(int32 siz)
{
int32 total;
DeferChunk *c;
int32 total, sc;
Defer *d;
c = g->dchunk;
total = sizeof(*d) + ROUND(siz, sizeof(uintptr)) - sizeof(d->args);
if(c == nil || total > DeferChunkSize - c->off) {
if(total > DeferChunkSize / 2) {
// Not worth putting in any chunk.
// Allocate a separate block.
d = runtime·malloc(total);
d->siz = siz;
d->special = 1;
d->free = 1;
d->link = g->defer;
g->defer = d;
return d;
}
// Cannot fit in current chunk.
// Switch to next chunk, allocating if necessary.
c = g->dchunknext;
if(c == nil)
c = runtime·malloc(DeferChunkSize);
c->prev = g->dchunk;
c->off = sizeof(*c);
g->dchunk = c;
g->dchunknext = nil;
P *p;
d = nil;
sc = DEFERCLASS(siz);
if(sc < nelem(p->deferpool)) {
p = m->p;
d = p->deferpool[sc];
if(d)
p->deferpool[sc] = d->link;
}
if(d == nil) {
// deferpool is empty or just a big defer
total = TOTALSIZE(siz);
d = runtime·malloc(total);
}
d = (Defer*)((byte*)c + c->off);
c->off += total;
d->siz = siz;
d->special = 0;
d->free = 0;
d->link = g->defer;
g->defer = d;
return d;
}
// Pop the current defer from the defer stack.
// Its contents are still valid until the goroutine begins executing again.
// In particular it is safe to call reflect.call(d->fn, d->argp, d->siz) after
// popdefer returns.
static void
popdefer(void)
{
Defer *d;
DeferChunk *c;
int32 total;
d = g->defer;
if(d == nil)
runtime·throw("runtime: popdefer nil");
g->defer = d->link;
if(d->special) {
// Nothing else to do.
return;
}
total = sizeof(*d) + ROUND(d->siz, sizeof(uintptr)) - sizeof(d->args);
c = g->dchunk;
if(c == nil || (byte*)d+total != (byte*)c+c->off)
runtime·throw("runtime: popdefer phase error");
c->off -= total;
if(c->off == sizeof(*c)) {
// Chunk now empty, so pop from stack.
// Save in dchunknext both to help with pingponging between frames
// and to make sure d is still valid on return.
if(g->dchunknext != nil)
runtime·free(g->dchunknext);
g->dchunknext = c;
g->dchunk = c->prev;
}
return d;
}
// Free the given defer.
// For defers in the per-goroutine chunk this just clears the saved arguments.
// For large defers allocated on the heap, this frees them.
// The defer cannot be used after this call.
static void
freedefer(Defer *d)
{
int32 total;
int32 sc;
P *p;
if(d->special) {
if(d->free)
runtime·free(d);
} else {
// Wipe out any possible pointers in argp/pc/fn/args.
total = sizeof(*d) + ROUND(d->siz, sizeof(uintptr)) - sizeof(d->args);
runtime·memclr((byte*)d, total);
}
if(d->special)
return;
sc = DEFERCLASS(d->siz);
if(sc < nelem(p->deferpool)) {
p = m->p;
d->link = p->deferpool[sc];
p->deferpool[sc] = d;
// No need to wipe out pointers in argp/pc/fn/args,
// because we empty the pool before GC.
} else
runtime·free(d);
}
// Create a new deferred function fn with siz bytes of arguments.
......@@ -182,7 +137,7 @@ runtime·deferreturn(uintptr arg0)
m->locks++;
runtime·memmove(argp, d->args, d->siz);
fn = d->fn;
popdefer();
g->defer = d->link;
freedefer(d);
m->locks--;
if(m->locks == 0 && g->preempt)
......@@ -190,6 +145,37 @@ runtime·deferreturn(uintptr arg0)
runtime·jmpdefer(fn, argp);
}
// Ensure that defer arg sizes that map to the same defer size class
// also map to the same malloc size class.
void
runtime·testdefersizes(void)
{
P *p;
int32 i, siz, defersc, mallocsc;
int32 map[nelem(p->deferpool)];
for(i=0; i<nelem(p->deferpool); i++)
map[i] = -1;
for(i=0;; i++) {
defersc = DEFERCLASS(i);
if(defersc >= nelem(p->deferpool))
break;
siz = TOTALSIZE(i);
mallocsc = runtime·SizeToClass(siz);
siz = runtime·class_to_size[mallocsc];
// runtime·printf("defer class %d: arg size %d, block size %d(%d)\n", defersc, i, siz, mallocsc);
if(map[defersc] < 0) {
map[defersc] = mallocsc;
continue;
}
if(map[defersc] != mallocsc) {
runtime·printf("bad defer size class: i=%d siz=%d mallocsc=%d/%d\n",
i, siz, map[defersc], mallocsc);
runtime·throw("bad defer size class");
}
}
}
// Run all deferred functions for the current goroutine.
static void
rundefer(void)
......@@ -197,7 +183,7 @@ rundefer(void)
Defer *d;
while((d = g->defer) != nil) {
popdefer();
g->defer = d->link;
reflect·call(d->fn, (byte*)d->args, d->siz);
freedefer(d);
}
......@@ -239,7 +225,7 @@ runtime·panic(Eface e)
if(d == nil)
break;
// take defer off list in case of recursive panic
popdefer();
g->defer = d->link;
g->ispanic = true; // rock for newstack, where reflect.newstackcall ends up
argp = d->argp;
pc = d->pc;
......
......@@ -204,7 +204,6 @@ runtime·main(void)
d.link = g->defer;
d.argp = (void*)-1;
d.special = true;
d.free = false;
g->defer = &d;
if(m != &runtime·m0)
......
......@@ -70,7 +70,6 @@ typedef struct PtrType PtrType;
typedef struct ChanType ChanType;
typedef struct MapType MapType;
typedef struct Defer Defer;
typedef struct DeferChunk DeferChunk;
typedef struct Panic Panic;
typedef struct Hmap Hmap;
typedef struct Hchan Hchan;
......@@ -281,8 +280,6 @@ struct G
int32 sig;
int32 writenbuf;
byte* writebuf;
DeferChunk* dchunk;
DeferChunk* dchunknext;
uintptr sigcode0;
uintptr sigcode1;
uintptr sigpc;
......@@ -387,6 +384,7 @@ struct P
uint32 syscalltick; // incremented on every system call
M* m; // back-link to associated M (nil if idle)
MCache* mcache;
Defer* deferpool[5]; // pool of available Defer structs of different sizes (see panic.c)
// Queue of runnable goroutines.
uint32 runqhead;
......@@ -676,7 +674,6 @@ struct Defer
{
int32 siz;
bool special; // not part of defer frame
bool free; // if special, free when done
byte* argp; // where args were copied from
byte* pc;
FuncVal* fn;
......@@ -684,12 +681,6 @@ struct Defer
void* args[1]; // padded to actual size
};
struct DeferChunk
{
DeferChunk *prev;
uintptr off;
};
/*
* panics
*/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment