runtime: remove in-use page count loop from STW

In order to compute the sweep ratio, the runtime needs to know how many pages belong to spans in state _MSpanInUse. Currently it finds this out by looping over all spans during mark termination. However, this takes ~1ms/heap GB, so multi-gigabyte heaps can quickly push our STW time past 10ms. Replace the loop with an actively maintained count of in-use pages. For multi-gigabyte heaps, this reduces max mark termination pause time by 75%–90% relative to tip and by 85%–95% relative to Go 1.5.1. This shifts the longest pause time for large heaps to the sweep termination phase, so it only slightly decreases max pause time, though it roughly halves mean pause time. Here are the results for the garbage benchmark: ---- max mark termination pause ---- Heap Procs after change before change 1.5.1 24GB 12 1.9ms 18ms 37ms 24GB 4 3.7ms 18ms 37ms 4GB 4 920µs 3.8ms 6.9ms Fixes #11484. Change-Id: Ia2d28bb8a1e4f1c3b8ebf79fb203f12b9bf114ac Reviewed-on: https://go-review.googlesource.com/15070Reviewed-by: Rick Hudson <rlh@golang.org> Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>

runtime: remove in-use page count loop from STW
In order to compute the sweep ratio, the runtime needs to know how many pages belong to spans in state _MSpanInUse. Currently it finds this out by looping over all spans during mark termination. However, this takes ~1ms/heap GB, so multi-gigabyte heaps can quickly push our STW time past 10ms. Replace the loop with an actively maintained count of in-use pages. For multi-gigabyte heaps, this reduces max mark termination pause time by 75%–90% relative to tip and by 85%–95% relative to Go 1.5.1. This shifts the longest pause time for large heaps to the sweep termination phase, so it only slightly decreases max pause time, though it roughly halves mean pause time. Here are the results for the garbage benchmark: ---- max mark termination pause ---- Heap Procs after change before change 1.5.1 24GB 12 1.9ms 18ms 37ms 24GB 4 3.7ms 18ms 37ms 4GB 4 920µs 3.8ms 6.9ms Fixes #11484. Change-Id: Ia2d28bb8a1e4f1c3b8ebf79fb203f12b9bf114ac Reviewed-on: https://go-review.googlesource.com/15070Reviewed-by: Rick Hudson <rlh@golang.org> Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
dac220b0 · Austin Clements · 608c1b0d · dac220b0 · dac220b0
Commit dac220b0 authored Sep 26, 2015 by Austin Clements
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 9 deletions

mgc.go src/runtime/mgc.go +4 -9

mheap.go src/runtime/mheap.go +8 -0

No files found.
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@@ -1565,14 +1565,9 @@ func gcSweep(mode gcMode) {
 		return
 	}
-	// Account how much sweeping needs to be done before the next
+	// Concurrent sweep needs to sweep all of the in-use pages by
-	// GC cycle and set up proportional sweep statistics.
+	// the time the allocated heap reaches the GC trigger. Compute
-	var pagesToSweep uintptr
+	// the ratio of in-use pages to sweep per byte allocated.
-	for _, s := range work.spans {
-		if s.state == mSpanInUse {
-			pagesToSweep += s.npages
-		}
-	}
 	heapDistance := int64(memstats.next_gc) - int64(memstats.heap_live)
 	// Add a little margin so rounding errors and concurrent
 	// sweep are less likely to leave pages unswept when GC starts.
@@ -1582,7 +1577,7 @@ func gcSweep(mode gcMode) {
 		heapDistance = _PageSize
 	}
 	lock(&mheap_.lock)
-	mheap_.sweepPagesPerByte = float64(pagesToSweep) / float64(heapDistance)
+	mheap_.sweepPagesPerByte = float64(mheap_.pagesInUse) / float64(heapDistance)
 	mheap_.pagesSwept = 0
 	mheap_.spanBytesAlloc = 0
 	unlock(&mheap_.lock)

--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -29,9 +29,12 @@ type mheap struct {
 	spans_mapped uintptr
 	// Proportional sweep
+	pagesInUse        uint64  // pages of spans in stats _MSpanInUse; R/W with mheap.lock
 	spanBytesAlloc    uint64  // bytes of spans allocated this cycle; updated atomically
 	pagesSwept        uint64  // pages swept this cycle; updated atomically
 	sweepPagesPerByte float64 // proportional sweep ratio; written with lock, read without
+	// TODO(austin): pagesInUse should be a uintptr, but the 386
+	// compiler can't 8-byte align fields.
 	// Malloc stats.
 	largefree  uint64                  // bytes freed for large objects (>maxsmallsize)
@@ -447,6 +450,7 @@ func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan
 		}
 		// update stats, sweep lists
+		h.pagesInUse += uint64(npage)
 		if large {
 			memstats.heap_objects++
 			memstats.heap_live += uint64(npage << _PageShift)
@@ -614,6 +618,8 @@ func bestFit(list *mspan, npage uintptr, best *mspan) *mspan {
 // Try to add at least npage pages of memory to the heap,
 // returning whether it worked.
+//
+// h must be locked.
 func mHeap_Grow(h *mheap, npage uintptr) bool {
 	// Ask for a big chunk, to reduce the number of mappings
 	// the operating system needs to track; also amortizes
@@ -648,6 +654,7 @@ func mHeap_Grow(h *mheap, npage uintptr) bool {
 	}
 	atomicstore(&s.sweepgen, h.sweepgen)
 	s.state = _MSpanInUse
+	h.pagesInUse += uint64(npage)
 	mHeap_FreeSpanLocked(h, s, false, true, 0)
 	return true
 }
@@ -728,6 +735,7 @@ func mHeap_FreeSpanLocked(h *mheap, s *mspan, acctinuse, acctidle bool, unusedsi
 			print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " ref ", s.ref, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
 			throw("MHeap_FreeSpanLocked - invalid free")
 		}
+		h.pagesInUse -= uint64(s.npages)
 	default:
 		throw("MHeap_FreeSpanLocked - invalid span state")
 	}