Commit 32d6fbcb authored by Russ Cox's avatar Russ Cox

runtime: replace needwb() with writeBarrierEnabled

Reduce the write barrier check to a single load and compare
so that it can be inlined into write barrier use sites.
Makes the standard write barrier a little faster too.

name                                       old                     new          delta
BenchmarkBinaryTree17              17.9s × (0.99,1.01)     17.9s × (1.00,1.01)  ~
BenchmarkFannkuch11                4.35s × (1.00,1.00)     4.43s × (1.00,1.00)  +1.81%
BenchmarkFmtFprintfEmpty           120ns × (0.93,1.06)     110ns × (1.00,1.06)  -7.92%
BenchmarkFmtFprintfString          479ns × (0.99,1.00)     487ns × (0.99,1.00)  +1.67%
BenchmarkFmtFprintfInt             452ns × (0.99,1.02)     450ns × (0.99,1.00)  ~
BenchmarkFmtFprintfIntInt          766ns × (0.99,1.01)     762ns × (1.00,1.00)  ~
BenchmarkFmtFprintfPrefixedInt     576ns × (0.98,1.01)     584ns × (0.99,1.01)  ~
BenchmarkFmtFprintfFloat           730ns × (1.00,1.01)     738ns × (1.00,1.00)  +1.16%
BenchmarkFmtManyArgs              2.84µs × (0.99,1.00)    2.80µs × (1.00,1.01)  -1.22%
BenchmarkGobDecode                39.3ms × (0.98,1.01)    39.0ms × (0.99,1.00)  ~
BenchmarkGobEncode                39.5ms × (0.99,1.01)    37.8ms × (0.98,1.01)  -4.33%
BenchmarkGzip                      663ms × (1.00,1.01)     661ms × (0.99,1.01)  ~
BenchmarkGunzip                    143ms × (1.00,1.00)     142ms × (1.00,1.00)  ~
BenchmarkHTTPClientServer          132µs × (0.99,1.01)     132µs × (0.99,1.01)  ~
BenchmarkJSONEncode               57.4ms × (0.99,1.01)    56.3ms × (0.99,1.01)  -1.96%
BenchmarkJSONDecode                139ms × (0.99,1.00)     138ms × (0.99,1.01)  ~
BenchmarkMandelbrot200            6.03ms × (1.00,1.00)    6.01ms × (1.00,1.00)  ~
BenchmarkGoParse                  10.3ms × (0.89,1.14)    10.2ms × (0.87,1.05)  ~
BenchmarkRegexpMatchEasy0_32       209ns × (1.00,1.00)     208ns × (1.00,1.00)  ~
BenchmarkRegexpMatchEasy0_1K       591ns × (0.99,1.00)     588ns × (1.00,1.00)  ~
BenchmarkRegexpMatchEasy1_32       184ns × (0.99,1.02)     182ns × (0.99,1.01)  ~
BenchmarkRegexpMatchEasy1_1K      1.01µs × (1.00,1.00)    0.99µs × (1.00,1.01)  -2.33%
BenchmarkRegexpMatchMedium_32      330ns × (1.00,1.00)     323ns × (1.00,1.01)  -2.12%
BenchmarkRegexpMatchMedium_1K     92.6µs × (1.00,1.00)    89.9µs × (1.00,1.00)  -2.92%
BenchmarkRegexpMatchHard_32       4.80µs × (0.95,1.00)    4.72µs × (0.95,1.01)  ~
BenchmarkRegexpMatchHard_1K        136µs × (1.00,1.00)     133µs × (1.00,1.01)  -1.86%
BenchmarkRevcomp                   900ms × (0.99,1.04)     900ms × (1.00,1.05)  ~
BenchmarkTemplate                  172ms × (1.00,1.00)     168ms × (0.99,1.01)  -2.07%
BenchmarkTimeParse                 637ns × (1.00,1.00)     637ns × (1.00,1.00)  ~
BenchmarkTimeFormat                744ns × (1.00,1.01)     738ns × (1.00,1.00)  -0.67%

Change-Id: I4ecc925805da1f5ee264377f1f7574f54ee575e7
Reviewed-on: https://go-review.googlesource.com/9321Reviewed-by: 's avatarAustin Clements <austin@google.com>
parent 2050f571
......@@ -76,13 +76,6 @@ func gcmarkwb_m(slot *uintptr, ptr uintptr) {
}
}
// needwb reports whether a write barrier is needed now
// (otherwise the write can be made directly).
//go:nosplit
func needwb() bool {
return gcphase == _GCmark || gcphase == _GCmarktermination || mheap_.shadow_enabled
}
// Write barrier calls must not happen during critical GC and scheduler
// related operations. In particular there are times when the GC assumes
// that the world is stopped but scheduler related code is still being
......@@ -114,7 +107,7 @@ func writebarrierptr_nostore1(dst *uintptr, src uintptr) {
// but if we do that, Go inserts a write barrier on *dst = src.
//go:nosplit
func writebarrierptr(dst *uintptr, src uintptr) {
if !needwb() {
if !writeBarrierEnabled {
*dst = src
return
}
......@@ -155,7 +148,7 @@ func writebarrierptr_shadow(dst *uintptr, src uintptr) {
// Do not reapply.
//go:nosplit
func writebarrierptr_nostore(dst *uintptr, src uintptr) {
if !needwb() {
if !writeBarrierEnabled {
return
}
......@@ -224,7 +217,7 @@ func writebarrieriface(dst *[2]uintptr, src [2]uintptr) {
// typedmemmove copies a value of type t to dst from src.
//go:nosplit
func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
if !needwb() || (typ.kind&kindNoPointers) != 0 {
if !writeBarrierEnabled || (typ.kind&kindNoPointers) != 0 {
memmove(dst, src, typ.size)
return
}
......@@ -266,7 +259,7 @@ func reflect_typedmemmove(typ *_type, dst, src unsafe.Pointer) {
// dst and src point off bytes into the value and only copies size bytes.
//go:linkname reflect_typedmemmovepartial reflect.typedmemmovepartial
func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size uintptr) {
if !needwb() || (typ.kind&kindNoPointers) != 0 || size < ptrSize {
if !writeBarrierEnabled || (typ.kind&kindNoPointers) != 0 || size < ptrSize {
memmove(dst, src, size)
return
}
......@@ -309,7 +302,7 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size
// not to be preempted before the write barriers have been run.
//go:nosplit
func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uintptr) {
if !needwb() || typ == nil || (typ.kind&kindNoPointers) != 0 || framesize-retoffset < ptrSize {
if !writeBarrierEnabled || typ == nil || (typ.kind&kindNoPointers) != 0 || framesize-retoffset < ptrSize {
return
}
......@@ -349,7 +342,7 @@ func typedslicecopy(typ *_type, dst, src slice) int {
racereadrangepc(srcp, uintptr(n)*typ.size, callerpc, pc)
}
if !needwb() {
if !writeBarrierEnabled {
memmove(dstp, srcp, uintptr(n)*typ.size)
return n
}
......@@ -465,6 +458,7 @@ func wbshadowinit() {
}
mheap_.shadow_enabled = true
writeBarrierEnabled = true
}
// shadowptr returns a pointer to the shadow value for addr.
......
......@@ -170,6 +170,32 @@ func setGCPercent(in int32) (out int32) {
return out
}
// Garbage collector phase.
// Indicates to write barrier and sychronization task to preform.
var gcphase uint32
var writeBarrierEnabled bool // compiler emits references to this in write barriers
// gcBlackenEnabled is 1 if mutator assists and background mark
// workers are allowed to blacken objects. This must only be set when
// gcphase == _GCmark.
var gcBlackenEnabled uint32
const (
_GCoff = iota // GC not running, write barrier disabled
_GCquiesce // unused state
_GCstw // unused state
_GCscan // GC collecting roots into workbufs, write barrier disabled
_GCmark // GC marking from workbufs, write barrier ENABLED
_GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED
_GCsweep // GC mark completed; sweeping in background, write barrier disabled
)
//go:nosplit
func setGCPhase(x uint32) {
atomicstore(&gcphase, x)
writeBarrierEnabled = gcphase == _GCmark || gcphase == _GCmarktermination || mheap_.shadow_enabled
}
// gcMarkWorkerMode represents the mode that a concurrent mark worker
// should operate in.
//
......@@ -753,7 +779,7 @@ func gc(mode int) {
heapGoal = gcController.heapGoal
systemstack(func() {
gcphase = _GCscan
setGCPhase(_GCscan)
// Concurrent scan.
starttheworld()
......@@ -769,7 +795,7 @@ func gc(mode int) {
if debug.gctrace > 0 {
tInstallWB = nanotime()
}
atomicstore(&gcphase, _GCmark)
setGCPhase(_GCmark)
// Ensure all Ps have observed the phase
// change and have write barriers enabled
// before any blackening occurs.
......@@ -826,7 +852,7 @@ func gc(mode int) {
// World is stopped.
// Start marktermination which includes enabling the write barrier.
atomicstore(&gcBlackenEnabled, 0)
gcphase = _GCmarktermination
setGCPhase(_GCmarktermination)
if debug.gctrace > 0 {
heap1 = memstats.heap_live
......@@ -862,7 +888,7 @@ func gc(mode int) {
}
// marking is complete so we can turn the write barrier off
gcphase = _GCoff
setGCPhase(_GCoff)
gcSweep(mode)
if debug.gctrace > 1 {
......@@ -876,9 +902,9 @@ func gc(mode int) {
// Still in STW but gcphase is _GCoff, reset to _GCmarktermination
// At this point all objects will be found during the gcMark which
// does a complete STW mark and object scan.
gcphase = _GCmarktermination
setGCPhase(_GCmarktermination)
gcMark(startTime)
gcphase = _GCoff // marking is done, turn off wb.
setGCPhase(_GCoff) // marking is done, turn off wb.
gcSweep(mode)
}
})
......
......@@ -514,30 +514,12 @@ type lfnode struct {
pushcnt uintptr
}
// Indicates to write barrier and sychronization task to preform.
const (
_GCoff = iota // GC not running, write barrier disabled
_GCquiesce // unused state
_GCstw // unused state
_GCscan // GC collecting roots into workbufs, write barrier disabled
_GCmark // GC marking from workbufs, write barrier ENABLED
_GCmarktermination // GC mark termination: allocate black, P's help GC, write barrier ENABLED
_GCsweep // GC mark completed; sweeping in background, write barrier disabled
)
type forcegcstate struct {
lock mutex
g *g
idle uint32
}
var gcphase uint32
// gcBlackenEnabled is 1 if mutator assists and background mark
// workers are allowed to blacken objects. This must only be set when
// gcphase == _GCmark.
var gcBlackenEnabled uint32
/*
* known to compiler
*/
......
......@@ -84,7 +84,7 @@ func growslice(t *slicetype, old slice, n int) slice {
memclr(add(p, lenmem), capmem-lenmem)
} else {
// Note: can't use rawmem (which avoids zeroing of memory), because then GC can scan unitialized memory.
// TODO(rsc): Use memmove when !needwb().
// TODO(rsc): Use memmove when !writeBarrierEnabled.
p = newarray(et, uintptr(newcap))
for i := 0; i < old.len; i++ {
typedmemmove(et, add(p, uintptr(i)*et.size), add(old.array, uintptr(i)*et.size))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment