Commit 4945fc8e authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: speedup GC sweep phase (batch free)

benchmark                             old ns/op    new ns/op    delta
garbage.BenchmarkParser              4370050250   3779668750  -13.51%
garbage.BenchmarkParser-2            3713087000   3628771500   -2.27%
garbage.BenchmarkParser-4            3519755250   3406349750   -3.22%
garbage.BenchmarkParser-8            3386627750   3319144000   -1.99%

garbage.BenchmarkTree                 493585529    408102411  -17.32%
garbage.BenchmarkTree-2               500487176    402285176  -19.62%
garbage.BenchmarkTree-4               473238882    361484058  -23.61%
garbage.BenchmarkTree-8               486977823    368334823  -24.36%

garbage.BenchmarkTree2                 31446600     31203200   -0.77%
garbage.BenchmarkTree2-2               21469000     21077900   -1.82%
garbage.BenchmarkTree2-4               11007600     10899100   -0.99%
garbage.BenchmarkTree2-8                7692400      7032600   -8.58%

garbage.BenchmarkParserPause          241863263    163249450  -32.50%
garbage.BenchmarkParserPause-2        120135418    112981575   -5.95%
garbage.BenchmarkParserPause-4         83411552     64580700  -22.58%
garbage.BenchmarkParserPause-8         51870697     42207244  -18.63%

garbage.BenchmarkTreePause             20940474     13147011  -37.22%
garbage.BenchmarkTreePause-2           20115124     11146715  -44.59%
garbage.BenchmarkTreePause-4           17217584      7486327  -56.52%
garbage.BenchmarkTreePause-8           18258845      7400871  -59.47%

garbage.BenchmarkTree2Pause           174067190    172674190   -0.80%
garbage.BenchmarkTree2Pause-2         131175809    130615761   -0.43%
garbage.BenchmarkTree2Pause-4          95406666     93972047   -1.50%
garbage.BenchmarkTree2Pause-8          86056095     85334952   -0.84%

garbage.BenchmarkParserLastPause      329932000    324790000   -1.56%
garbage.BenchmarkParserLastPause-2    209383000    210456000   +0.51%
garbage.BenchmarkParserLastPause-4    113981000    112921000   -0.93%
garbage.BenchmarkParserLastPause-8     77967000     76625000   -1.72%

garbage.BenchmarkTreeLastPause         29752000     18444000  -38.01%
garbage.BenchmarkTreeLastPause-2       24274000     14766000  -39.17%
garbage.BenchmarkTreeLastPause-4       19565000      8726000  -55.40%
garbage.BenchmarkTreeLastPause-8       21956000     10530000  -52.04%

garbage.BenchmarkTree2LastPause       314411000    311945000   -0.78%
garbage.BenchmarkTree2LastPause-2     214641000    210836000   -1.77%
garbage.BenchmarkTree2LastPause-4     110024000    108943000   -0.98%
garbage.BenchmarkTree2LastPause-8      76873000     70263000   -8.60%

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/5991049
parent a5dc7793
......@@ -125,7 +125,7 @@ enum
// 2, 3, and 4 are all plausible maximums depending
// on the hardware details of the machine. The garbage
// collector scales well to 4 cpus.
MaxGcproc = 4,
MaxGcproc = 16,
};
// A generic linked list of blocks. (Typically the block is bigger than sizeof(MLink).)
......@@ -341,6 +341,7 @@ struct MCentral
void runtime·MCentral_Init(MCentral *c, int32 sizeclass);
int32 runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **first);
void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *first);
void runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
// Main malloc heap.
// The heap itself is the "free[]" and "large" arrays,
......
......@@ -88,9 +88,6 @@ MCentral_Alloc(MCentral *c)
}
// Free n objects back into the central free list.
// Return the number of objects allocated.
// The objects are linked together by their first words.
// On return, *pstart points at the first object and *pend at the last.
void
runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *start)
{
......@@ -148,6 +145,42 @@ MCentral_Free(MCentral *c, void *v)
}
}
// Free n objects from a span s back into the central free list c.
// Called from GC.
void
runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end)
{
int32 size;
runtime·lock(c);
// Move to nonempty if necessary.
if(s->freelist == nil) {
runtime·MSpanList_Remove(s);
runtime·MSpanList_Insert(&c->nonempty, s);
}
// Add the objects back to s's free list.
end->next = s->freelist;
s->freelist = start;
s->ref -= n;
c->nfree += n;
// If s is completely freed, return it to the heap.
if(s->ref == 0) {
size = runtime·class_to_size[c->sizeclass];
runtime·MSpanList_Remove(s);
*(uintptr*)(s->start<<PageShift) = 1; // needs zeroing
s->freelist = nil;
c->nfree -= (s->npages << PageShift) / size;
runtime·unlock(c);
runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
runtime·MHeap_Free(&runtime·mheap, s, 0);
} else {
runtime·unlock(c);
}
}
void
runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj)
{
......
......@@ -761,6 +761,8 @@ sweepspan(MSpan *s)
byte *p;
MCache *c;
byte *arena_start;
MLink *start, *end;
int32 nfree;
arena_start = runtime·mheap.arena_start;
p = (byte*)(s->start << PageShift);
......@@ -774,6 +776,9 @@ sweepspan(MSpan *s)
npages = runtime·class_to_allocnpages[cl];
n = (npages << PageShift) / size;
}
nfree = 0;
start = end = nil;
c = m->mcache;
// Sweep through n objects of given size starting at p.
// This thread owns the span now, so it can manipulate
......@@ -810,21 +815,33 @@ sweepspan(MSpan *s)
// Mark freed; restore block boundary bit.
*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
c = m->mcache;
if(s->sizeclass == 0) {
// Free large span.
runtime·unmarkspan(p, 1<<PageShift);
*(uintptr*)p = 1; // needs zeroing
runtime·MHeap_Free(&runtime·mheap, s, 1);
c->local_alloc -= size;
c->local_nfree++;
} else {
// Free small object.
if(size > sizeof(uintptr))
((uintptr*)p)[1] = 1; // mark as "needs to be zeroed"
c->local_by_size[s->sizeclass].nfree++;
runtime·MCache_Free(c, p, s->sizeclass, size);
if(nfree)
end->next = (MLink*)p;
else
start = (MLink*)p;
end = (MLink*)p;
nfree++;
}
c->local_alloc -= size;
c->local_nfree++;
}
if(nfree) {
c->local_by_size[s->sizeclass].nfree += nfree;
c->local_alloc -= size * nfree;
c->local_nfree += nfree;
c->local_cachealloc -= nfree * size;
c->local_objects -= nfree;
runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, start, end);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment