Commit 07e738ec authored by Michael Anthony Knyszek's avatar Michael Anthony Knyszek Committed by Michael Knyszek

runtime: use only treaps for tracking spans

Currently, mheap tracks spans in both mSpanLists and mTreaps, but
mSpanLists, while they tend to be smaller, complicate the
implementation. Here we simplify the implementation by removing
free and busy from mheap and renaming freelarge -> free and busylarge
-> busy.

This change also slightly changes the reclamation policy. Previously,
for allocations under 1MB we would attempt to find a small span of the
right size. Now, we just try to find any number of spans totaling the
right size. This may increase heap fragmentation, but that will be dealt
with using virtual memory tricks in follow-up CLs.

For #14045.

Garbage-heavy benchmarks show very little change, except what appears
to be a decrease in STW times and peak RSS.

name                      old STW-ns/GC       new STW-ns/GC       delta
Garbage/benchmem-MB=64-8           263k ±64%           217k ±24%  -17.66%  (p=0.028 n=25+23)

name                      old STW-ns/op       new STW-ns/op       delta
Garbage/benchmem-MB=64-8          9.39k ±65%          7.80k ±24%  -16.88%  (p=0.037 n=25+23)

name                      old peak-RSS-bytes  new peak-RSS-bytes  delta
Garbage/benchmem-MB=64-8           281M ± 0%           249M ± 4%  -11.40%  (p=0.000 n=19+18)

https://perf.golang.org/search?q=upload:20181005.1

Go1 benchmarks perform roughly the same, the most notable regression
being the JSON encode/decode benchmark with worsens by ~2%.

name                     old time/op    new time/op    delta
BinaryTree17-8              3.02s ± 2%     2.99s ± 2%  -1.18%  (p=0.000 n=25+24)
Fannkuch11-8                3.05s ± 1%     3.02s ± 2%  -1.20%  (p=0.000 n=25+25)
FmtFprintfEmpty-8          43.6ns ± 5%    43.4ns ± 3%    ~     (p=0.528 n=25+25)
FmtFprintfString-8         74.9ns ± 3%    73.4ns ± 1%  -2.03%  (p=0.001 n=25+24)
FmtFprintfInt-8            79.3ns ± 3%    77.9ns ± 1%  -1.73%  (p=0.003 n=25+25)
FmtFprintfIntInt-8          119ns ± 6%     116ns ± 0%  -2.68%  (p=0.000 n=25+18)
FmtFprintfPrefixedInt-8     134ns ± 4%     132ns ± 1%  -1.52%  (p=0.004 n=25+25)
FmtFprintfFloat-8           240ns ± 1%     241ns ± 1%    ~     (p=0.403 n=24+23)
FmtManyArgs-8               543ns ± 1%     537ns ± 1%  -1.00%  (p=0.000 n=25+25)
GobDecode-8                6.88ms ± 1%    6.92ms ± 4%    ~     (p=0.088 n=24+22)
GobEncode-8                5.92ms ± 1%    5.93ms ± 1%    ~     (p=0.898 n=25+24)
Gzip-8                      267ms ± 2%     266ms ± 2%    ~     (p=0.213 n=25+24)
Gunzip-8                   35.4ms ± 1%    35.6ms ± 1%  +0.70%  (p=0.000 n=25+25)
HTTPClientServer-8          104µs ± 2%     104µs ± 2%    ~     (p=0.686 n=25+25)
JSONEncode-8               9.67ms ± 1%    9.80ms ± 4%  +1.32%  (p=0.000 n=25+25)
JSONDecode-8               47.7ms ± 1%    48.8ms ± 5%  +2.33%  (p=0.000 n=25+25)
Mandelbrot200-8            4.87ms ± 1%    4.91ms ± 1%  +0.79%  (p=0.000 n=25+25)
GoParse-8                  3.59ms ± 4%    3.55ms ± 1%    ~     (p=0.199 n=25+24)
RegexpMatchEasy0_32-8      90.3ns ± 1%    89.9ns ± 1%  -0.47%  (p=0.000 n=25+21)
RegexpMatchEasy0_1K-8       204ns ± 1%     204ns ± 1%    ~     (p=0.914 n=25+24)
RegexpMatchEasy1_32-8      84.9ns ± 0%    84.6ns ± 1%  -0.36%  (p=0.000 n=24+25)
RegexpMatchEasy1_1K-8       350ns ± 1%     348ns ± 3%  -0.59%  (p=0.007 n=25+25)
RegexpMatchMedium_32-8      122ns ± 1%     121ns ± 0%  -1.08%  (p=0.000 n=25+18)
RegexpMatchMedium_1K-8     36.1µs ± 1%    34.6µs ± 1%  -4.02%  (p=0.000 n=25+25)
RegexpMatchHard_32-8       1.69µs ± 2%    1.65µs ± 1%  -2.38%  (p=0.000 n=25+25)
RegexpMatchHard_1K-8       50.8µs ± 1%    49.4µs ± 1%  -2.69%  (p=0.000 n=25+24)
Revcomp-8                   453ms ± 2%     449ms ± 3%  -0.74%  (p=0.022 n=25+24)
Template-8                 63.2ms ± 2%    63.4ms ± 1%    ~     (p=0.127 n=25+24)
TimeParse-8                 313ns ± 1%     315ns ± 3%    ~     (p=0.924 n=24+25)
TimeFormat-8                294ns ± 1%     292ns ± 2%  -0.65%  (p=0.004 n=23+24)
[Geo mean]                 49.9µs         49.6µs       -0.65%

name                     old speed      new speed      delta
GobDecode-8               112MB/s ± 1%   110MB/s ± 4%  -1.00%  (p=0.036 n=24+24)
GobEncode-8               130MB/s ± 1%   129MB/s ± 1%    ~     (p=0.894 n=25+24)
Gzip-8                   72.7MB/s ± 2%  73.0MB/s ± 2%    ~     (p=0.208 n=25+24)
Gunzip-8                  549MB/s ± 1%   545MB/s ± 1%  -0.70%  (p=0.000 n=25+25)
JSONEncode-8              201MB/s ± 1%   198MB/s ± 3%  -1.29%  (p=0.000 n=25+25)
JSONDecode-8             40.7MB/s ± 1%  39.8MB/s ± 5%  -2.23%  (p=0.000 n=25+25)
GoParse-8                16.2MB/s ± 4%  16.3MB/s ± 1%    ~     (p=0.211 n=25+24)
RegexpMatchEasy0_32-8     354MB/s ± 1%   356MB/s ± 1%  +0.47%  (p=0.000 n=25+21)
RegexpMatchEasy0_1K-8    5.00GB/s ± 0%  4.99GB/s ± 1%    ~     (p=0.588 n=24+24)
RegexpMatchEasy1_32-8     377MB/s ± 1%   378MB/s ± 1%  +0.39%  (p=0.000 n=25+25)
RegexpMatchEasy1_1K-8    2.92GB/s ± 1%  2.94GB/s ± 3%  +0.65%  (p=0.008 n=25+25)
RegexpMatchMedium_32-8   8.14MB/s ± 1%  8.22MB/s ± 1%  +0.98%  (p=0.000 n=25+24)
RegexpMatchMedium_1K-8   28.4MB/s ± 1%  29.6MB/s ± 1%  +4.19%  (p=0.000 n=25+25)
RegexpMatchHard_32-8     18.9MB/s ± 2%  19.4MB/s ± 1%  +2.43%  (p=0.000 n=25+25)
RegexpMatchHard_1K-8     20.2MB/s ± 1%  20.7MB/s ± 1%  +2.76%  (p=0.000 n=25+24)
Revcomp-8                 561MB/s ± 2%   566MB/s ± 3%  +0.75%  (p=0.021 n=25+24)
Template-8               30.7MB/s ± 2%  30.6MB/s ± 1%    ~     (p=0.131 n=25+24)
[Geo mean]                120MB/s        121MB/s       +0.48%

https://perf.golang.org/search?q=upload:20181004.6

Change-Id: I97f9fee34577961a116a8ddd445c6272253f0f95
Reviewed-on: https://go-review.googlesource.com/c/139837
Run-TryBot: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarAustin Clements <austin@google.com>
parent e508a5f0
......@@ -136,8 +136,7 @@ const (
_TinySize = 16
_TinySizeClass = int8(2)
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
_MaxMHeapList = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc
// Per-P, per order stack segment cache size.
_StackCacheSize = 32 * 1024
......
......@@ -30,13 +30,11 @@ const minPhysPageSize = 4096
//go:notinheap
type mheap struct {
lock mutex
free [_MaxMHeapList]mSpanList // free lists of given length up to _MaxMHeapList
freelarge mTreap // free treap of length >= _MaxMHeapList
busy [_MaxMHeapList]mSpanList // busy lists of large spans of given length
busylarge mSpanList // busy lists of large spans length >= _MaxMHeapList
sweepgen uint32 // sweep generation, see comment in mspan
sweepdone uint32 // all spans are swept
sweepers uint32 // number of active sweepone calls
free mTreap // free treap of spans
busy mSpanList // busy list of spans
sweepgen uint32 // sweep generation, see comment in mspan
sweepdone uint32 // all spans are swept
sweepers uint32 // number of active sweepone calls
// allspans is a slice of all mspans ever created. Each mspan
// appears exactly once.
......@@ -599,12 +597,7 @@ func (h *mheap) init() {
h.spanalloc.zero = false
// h->mapcache needs no init
for i := range h.free {
h.free[i].init()
h.busy[i].init()
}
h.busylarge.init()
h.busy.init()
for i := range h.central {
h.central[i].mcentral.init(spanClass(i))
}
......@@ -647,30 +640,12 @@ retry:
// Sweeps and reclaims at least npage pages into heap.
// Called before allocating npage pages.
func (h *mheap) reclaim(npage uintptr) {
// First try to sweep busy spans with large objects of size >= npage,
// this has good chances of reclaiming the necessary space.
for i := int(npage); i < len(h.busy); i++ {
if h.reclaimList(&h.busy[i], npage) != 0 {
return // Bingo!
}
}
// Then -- even larger objects.
if h.reclaimList(&h.busylarge, npage) != 0 {
if h.reclaimList(&h.busy, npage) != 0 {
return // Bingo!
}
// Now try smaller objects.
// One such object is not enough, so we need to reclaim several of them.
reclaimed := uintptr(0)
for i := 0; i < int(npage) && i < len(h.busy); i++ {
reclaimed += h.reclaimList(&h.busy[i], npage-reclaimed)
if reclaimed >= npage {
return
}
}
// Now sweep everything that is not yet swept.
var reclaimed uintptr
unlock(&h.lock)
for {
n := sweepone()
......@@ -752,11 +727,7 @@ func (h *mheap) alloc_m(npage uintptr, spanclass spanClass, large bool) *mspan {
mheap_.nlargealloc++
atomic.Xadd64(&memstats.heap_live, int64(npage<<_PageShift))
// Swept spans are at the end of lists.
if s.npages < uintptr(len(h.busy)) {
h.busy[s.npages].insertBack(s)
} else {
h.busylarge.insertBack(s)
}
h.busy.insertBack(s)
}
}
// heap_scan and heap_live were updated.
......@@ -867,31 +838,20 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
// The returned span has been removed from the
// free list, but its state is still mSpanFree.
func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
var list *mSpanList
var s *mspan
// Try in fixed-size lists up to max.
for i := int(npage); i < len(h.free); i++ {
list = &h.free[i]
if !list.isEmpty() {
s = list.first
list.remove(s)
goto HaveSpan
}
}
// Best fit in list of large spans.
s = h.allocLarge(npage) // allocLarge removed s from h.freelarge for us
// Best fit in the treap of spans.
s = h.free.remove(npage)
if s == nil {
if !h.grow(npage) {
return nil
}
s = h.allocLarge(npage)
s = h.free.remove(npage)
if s == nil {
return nil
}
}
HaveSpan:
// Mark span in use.
if s.state != mSpanFree {
throw("MHeap_AllocLocked - MSpan not free")
......@@ -933,21 +893,6 @@ HaveSpan:
return s
}
// Large spans have a minimum size of 1MByte. The maximum number of large spans to support
// 1TBytes is 1 million, experimentation using random sizes indicates that the depth of
// the tree is less that 2x that of a perfectly balanced tree. For 1TByte can be referenced
// by a perfectly balanced tree with a depth of 20. Twice that is an acceptable 40.
func (h *mheap) isLargeSpan(npages uintptr) bool {
return npages >= uintptr(len(h.free))
}
// allocLarge allocates a span of at least npage pages from the treap of large spans.
// Returns nil if no such span currently exists.
func (h *mheap) allocLarge(npage uintptr) *mspan {
// Search treap for smallest span with >= npage pages.
return h.freelarge.remove(npage)
}
// Try to add at least npage pages of memory to the heap,
// returning whether it worked.
//
......@@ -1023,7 +968,7 @@ func (h *mheap) freeManual(s *mspan, stat *uint64) {
unlock(&h.lock)
}
// s must be on a busy list (h.busy or h.busylarge) or unlinked.
// s must be on the busy list or unlinked.
func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince int64) {
switch s.state {
case mSpanManual:
......@@ -1048,7 +993,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
}
s.state = mSpanFree
if s.inList() {
h.busyList(s.npages).remove(s)
h.busy.remove(s)
}
// Stamp newly unused spans. The scavenger will use that
......@@ -1069,12 +1014,7 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
h.setSpan(before.base(), s)
// The size is potentially changing so the treap needs to delete adjacent nodes and
// insert back as a combined node.
if h.isLargeSpan(before.npages) {
// We have a t, it is large so it has to be in the treap so we can remove it.
h.freelarge.removeSpan(before)
} else {
h.freeList(before.npages).remove(before)
}
h.free.removeSpan(before)
before.state = mSpanDead
h.spanalloc.free(unsafe.Pointer(before))
}
......@@ -1085,32 +1025,13 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
s.npreleased += after.npreleased
s.needzero |= after.needzero
h.setSpan(s.base()+s.npages*pageSize-1, s)
if h.isLargeSpan(after.npages) {
h.freelarge.removeSpan(after)
} else {
h.freeList(after.npages).remove(after)
}
h.free.removeSpan(after)
after.state = mSpanDead
h.spanalloc.free(unsafe.Pointer(after))
}
// Insert s into appropriate list or treap.
if h.isLargeSpan(s.npages) {
h.freelarge.insert(s)
} else {
h.freeList(s.npages).insert(s)
}
}
func (h *mheap) freeList(npages uintptr) *mSpanList {
return &h.free[npages]
}
func (h *mheap) busyList(npages uintptr) *mSpanList {
if npages < uintptr(len(h.busy)) {
return &h.busy[npages]
}
return &h.busylarge
// Insert s into the free treap.
h.free.insert(s)
}
func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
......@@ -1123,21 +1044,6 @@ func scavengeTreapNode(t *treapNode, now, limit uint64) uintptr {
return 0
}
func scavengelist(list *mSpanList, now, limit uint64) uintptr {
if list.isEmpty() {
return 0
}
var sumreleased uintptr
for s := list.first; s != nil; s = s.next {
if (now-uint64(s.unusedsince)) <= limit || s.npreleased == s.npages {
continue
}
sumreleased += s.scavenge()
}
return sumreleased
}
func (h *mheap) scavenge(k int32, now, limit uint64) {
// Disallow malloc or panic while holding the heap lock. We do
// this here because this is an non-mallocgc entry-point to
......@@ -1145,11 +1051,7 @@ func (h *mheap) scavenge(k int32, now, limit uint64) {
gp := getg()
gp.m.mallocing++
lock(&h.lock)
var sumreleased uintptr
for i := 0; i < len(h.free); i++ {
sumreleased += scavengelist(&h.free[i], now, limit)
}
sumreleased += scavengetreap(h.freelarge.treap, now, limit)
sumreleased := scavengetreap(h.free.treap, now, limit)
unlock(&h.lock)
gp.m.mallocing--
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment