Commit caa21475 authored by Dmitry Vyukov's avatar Dmitry Vyukov

runtime: per-P contexts for race detector

Race runtime also needs local malloc caches and currently uses
a mix of per-OS-thread and per-goroutine caches. This leads to
increased memory consumption. But more importantly cache of
synchronization objects is per-goroutine and we don't always
have goroutine context when feeing memory in GC. As the result
synchronization object descriptors leak (more precisely, they
can be reused if another synchronization object is recreated
at the same address, but it does not always help). For example,
the added BenchmarkSyncLeak has effectively runaway memory
consumption (based on a real long running server).

This change updates race runtime with support for per-P contexts.
BenchmarkSyncLeak now stabilizes at ~1GB memory consumption.

Long term, this will allow us to remove race runtime dependency
on glibc (as malloc is the main cornerstone).

I've also implemented a different scheme to pass P context to
race runtime: scheduler notified race runtime about association
between G and P by calling procwire(g, p)/procunwire(g, p).
But it turned out to be very messy as we have lots of places
where the association changes (e.g. syscalls). So I dropped it
in favor of the current scheme: race runtime asks scheduler
about the current P.

Fixes #14533

Change-Id: Iad10d2f816a44affae1b9fed446b3580eafd8c69
Reviewed-on: https://go-review.googlesource.com/19970Reviewed-by: 's avatarIan Lance Taylor <iant@golang.org>
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent fcd7c02c
......@@ -251,7 +251,7 @@ func (s *mspan) sweep(preserve bool) bool {
}
}
if debug.allocfreetrace != 0 {
if debug.allocfreetrace != 0 || raceenabled || msanenabled {
// Find all newly freed objects. This doesn't have to
// efficient; allocfreetrace has massive overhead.
mbits := s.markBitsForBase()
......@@ -259,7 +259,15 @@ func (s *mspan) sweep(preserve bool) bool {
for i := uintptr(0); i < s.nelems; i++ {
if !mbits.isMarked() && (abits.index < s.freeindex || abits.isMarked()) {
x := s.base() + i*s.elemsize
tracefree(unsafe.Pointer(x), size)
if debug.allocfreetrace != 0 {
tracefree(unsafe.Pointer(x), size)
}
if raceenabled {
racefree(unsafe.Pointer(x), size)
}
if msanenabled {
msanfree(unsafe.Pointer(x), size)
}
}
mbits.advance()
abits.advance()
......
......@@ -76,8 +76,9 @@ var buildVersion = sys.TheVersion
// for nmspinning manipulation.
var (
m0 m
g0 g
m0 m
g0 g
raceprocctx0 uintptr
)
//go:linkname runtime_init runtime.init
......@@ -434,7 +435,7 @@ func schedinit() {
// In particular, it must be done before mallocinit below calls racemapshadow.
_g_ := getg()
if raceenabled {
_g_.racectx = raceinit()
_g_.racectx, raceprocctx0 = raceinit()
}
sched.maxmcount = 10000
......@@ -3251,6 +3252,14 @@ func procresize(nprocs int32) *p {
pp.mcache = allocmcache()
}
}
if raceenabled && pp.racectx == 0 {
if old == 0 && i == 0 {
pp.racectx = raceprocctx0
raceprocctx0 = 0 // bootstrap
} else {
pp.racectx = raceproccreate()
}
}
}
// free unused P's
......@@ -3302,6 +3311,10 @@ func procresize(nprocs int32) *p {
p.mcache = nil
gfpurge(p)
traceProcFree(p)
if raceenabled {
raceprocdestroy(p.racectx)
p.racectx = 0
}
p.status = _Pdead
// can't free P itself because it can be referenced by an M in syscall
}
......
......@@ -58,7 +58,7 @@ func racereadpc(addr unsafe.Pointer, callpc, pc uintptr)
//go:noescape
func racewritepc(addr unsafe.Pointer, callpc, pc uintptr)
type symbolizeContext struct {
type symbolizeCodeContext struct {
pc uintptr
fn *byte
file *byte
......@@ -70,8 +70,27 @@ type symbolizeContext struct {
var qq = [...]byte{'?', '?', 0}
var dash = [...]byte{'-', 0}
const (
raceGetProcCmd = iota
raceSymbolizeCodeCmd
raceSymbolizeDataCmd
)
// Callback from C into Go, runs on g0.
func racesymbolize(ctx *symbolizeContext) {
func racecallback(cmd uintptr, ctx unsafe.Pointer) {
switch cmd {
case raceGetProcCmd:
throw("should have been handled by racecallbackthunk")
case raceSymbolizeCodeCmd:
raceSymbolizeCode((*symbolizeCodeContext)(ctx))
case raceSymbolizeDataCmd:
raceSymbolizeData((*symbolizeDataContext)(ctx))
default:
throw("unknown command")
}
}
func raceSymbolizeCode(ctx *symbolizeCodeContext) {
f := findfunc(ctx.pc)
if f == nil {
ctx.fn = &qq[0]
......@@ -91,6 +110,26 @@ func racesymbolize(ctx *symbolizeContext) {
return
}
type symbolizeDataContext struct {
addr uintptr
heap uintptr
start uintptr
size uintptr
name *byte
file *byte
line uintptr
res uintptr
}
func raceSymbolizeData(ctx *symbolizeDataContext) {
if _, x, n := findObject(unsafe.Pointer(ctx.addr)); x != nil {
ctx.heap = 1
ctx.start = uintptr(x)
ctx.size = n
ctx.res = 1
}
}
// Race runtime functions called via runtime·racecall.
//go:linkname __tsan_init __tsan_init
var __tsan_init byte
......@@ -98,6 +137,12 @@ var __tsan_init byte
//go:linkname __tsan_fini __tsan_fini
var __tsan_fini byte
//go:linkname __tsan_proc_create __tsan_proc_create
var __tsan_proc_create byte
//go:linkname __tsan_proc_destroy __tsan_proc_destroy
var __tsan_proc_destroy byte
//go:linkname __tsan_map_shadow __tsan_map_shadow
var __tsan_map_shadow byte
......@@ -113,6 +158,9 @@ var __tsan_go_end byte
//go:linkname __tsan_malloc __tsan_malloc
var __tsan_malloc byte
//go:linkname __tsan_free __tsan_free
var __tsan_free byte
//go:linkname __tsan_acquire __tsan_acquire
var __tsan_acquire byte
......@@ -131,11 +179,14 @@ var __tsan_go_ignore_sync_end byte
// Mimic what cmd/cgo would do.
//go:cgo_import_static __tsan_init
//go:cgo_import_static __tsan_fini
//go:cgo_import_static __tsan_proc_create
//go:cgo_import_static __tsan_proc_destroy
//go:cgo_import_static __tsan_map_shadow
//go:cgo_import_static __tsan_finalizer_goroutine
//go:cgo_import_static __tsan_go_start
//go:cgo_import_static __tsan_go_end
//go:cgo_import_static __tsan_malloc
//go:cgo_import_static __tsan_free
//go:cgo_import_static __tsan_acquire
//go:cgo_import_static __tsan_release
//go:cgo_import_static __tsan_release_merge
......@@ -175,7 +226,7 @@ func racefuncenter(uintptr)
func racefuncexit()
func racereadrangepc1(uintptr, uintptr, uintptr)
func racewriterangepc1(uintptr, uintptr, uintptr)
func racesymbolizethunk(uintptr)
func racecallbackthunk(uintptr)
// racecall allows calling an arbitrary function f from C race runtime
// with up to 4 uintptr arguments.
......@@ -189,14 +240,13 @@ func isvalidaddr(addr unsafe.Pointer) bool {
}
//go:nosplit
func raceinit() uintptr {
func raceinit() (gctx, pctx uintptr) {
// cgo is required to initialize libc, which is used by race runtime
if !iscgo {
throw("raceinit: race build must use cgo")
}
var racectx uintptr
racecall(&__tsan_init, uintptr(unsafe.Pointer(&racectx)), funcPC(racesymbolizethunk), 0, 0)
racecall(&__tsan_init, uintptr(unsafe.Pointer(&gctx)), uintptr(unsafe.Pointer(&pctx)), funcPC(racecallbackthunk), 0)
// Round data segment to page boundaries, because it's used in mmap().
start := ^uintptr(0)
......@@ -230,7 +280,7 @@ func raceinit() uintptr {
racedatastart = start
racedataend = start + size
return racectx
return
}
//go:nosplit
......@@ -238,6 +288,18 @@ func racefini() {
racecall(&__tsan_fini, 0, 0, 0, 0)
}
//go:nosplit
func raceproccreate() uintptr {
var ctx uintptr
racecall(&__tsan_proc_create, uintptr(unsafe.Pointer(&ctx)), 0, 0, 0)
return ctx
}
//go:nosplit
func raceprocdestroy(ctx uintptr) {
racecall(&__tsan_proc_destroy, ctx, 0, 0, 0)
}
//go:nosplit
func racemapshadow(addr unsafe.Pointer, size uintptr) {
if racearenastart == 0 {
......@@ -251,7 +313,12 @@ func racemapshadow(addr unsafe.Pointer, size uintptr) {
//go:nosplit
func racemalloc(p unsafe.Pointer, sz uintptr) {
racecall(&__tsan_malloc, uintptr(p), sz, 0, 0)
racecall(&__tsan_malloc, 0, 0, uintptr(p), sz)
}
//go:nosplit
func racefree(p unsafe.Pointer, sz uintptr) {
racecall(&__tsan_free, uintptr(p), sz, 0, 0)
}
//go:nosplit
......@@ -323,11 +390,7 @@ func raceacquireg(gp *g, addr unsafe.Pointer) {
//go:nosplit
func racerelease(addr unsafe.Pointer) {
_g_ := getg()
if _g_.raceignore != 0 || !isvalidaddr(addr) {
return
}
racereleaseg(_g_, addr)
racereleaseg(getg(), addr)
}
//go:nosplit
......
......@@ -4,4 +4,4 @@ the LLVM project (http://llvm.org/git/compiler-rt.git).
To update the .syso files use golang.org/x/build/cmd/racebuild.
Current runtime is built on rev 389d49d4943780efbfcd2a434f4462b6d0f23c44.
Current runtime is built on rev 9d79ea3416bfbe3acac50e47802ee9621bf53254.
......@@ -93,13 +93,13 @@ func racer(x *int, done chan bool) {
}
`, `==================
WARNING: DATA RACE
Write by goroutine [0-9]:
Write at 0x[0-9,a-f]+ by goroutine [0-9]:
main\.store\(\)
.+/main\.go:12 \+0x[0-9,a-f]+
main\.racer\(\)
.+/main\.go:19 \+0x[0-9,a-f]+
Previous write by main goroutine:
Previous write at 0x[0-9,a-f]+ by main goroutine:
main\.store\(\)
.+/main\.go:12 \+0x[0-9,a-f]+
main\.main\(\)
......
......@@ -17,10 +17,13 @@ import (
"fmt"
"io"
"log"
"math/rand"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"testing"
)
......@@ -195,3 +198,26 @@ func TestIssue9137(t *testing.T) {
t.Errorf("mangled a: %q %q", a, a[:1])
}
}
func BenchmarkSyncLeak(b *testing.B) {
const (
G = 1000
S = 1000
H = 10
)
var wg sync.WaitGroup
wg.Add(G)
for g := 0; g < G; g++ {
go func() {
defer wg.Done()
hold := make([][]uint32, H)
for i := 0; i < b.N; i++ {
a := make([]uint32, S)
atomic.AddUint32(&a[rand.Intn(len(a))], 1)
hold[rand.Intn(len(hold))] = a
}
_ = hold
}()
}
wg.Wait()
}
......@@ -18,8 +18,10 @@ const raceenabled = false
func raceReadObjectPC(t *_type, addr unsafe.Pointer, callerpc, pc uintptr) { throw("race") }
func raceWriteObjectPC(t *_type, addr unsafe.Pointer, callerpc, pc uintptr) { throw("race") }
func raceinit() uintptr { throw("race"); return 0 }
func raceinit() (uintptr, uintptr) { throw("race"); return 0, 0 }
func racefini() { throw("race") }
func raceproccreate() uintptr { throw("race"); return 0 }
func raceprocdestroy(ctx uintptr) { throw("race") }
func racemapshadow(addr unsafe.Pointer, size uintptr) { throw("race") }
func racewritepc(addr unsafe.Pointer, callerpc, pc uintptr) { throw("race") }
func racereadpc(addr unsafe.Pointer, callerpc, pc uintptr) { throw("race") }
......@@ -33,5 +35,6 @@ func racereleasemerge(addr unsafe.Pointer) { th
func racereleasemergeg(gp *g, addr unsafe.Pointer) { throw("race") }
func racefingo() { throw("race") }
func racemalloc(p unsafe.Pointer, sz uintptr) { throw("race") }
func racefree(p unsafe.Pointer, sz uintptr) { throw("race") }
func racegostart(pc uintptr) uintptr { throw("race"); return 0 }
func racegoend() { throw("race") }
......@@ -384,7 +384,24 @@ call:
// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
// The overall effect of Go->C->Go call chain is similar to that of mcall.
TEXT runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
// RARG0 contains command code. RARG1 contains command-specific context.
// See racecallback for command codes.
TEXT runtime·racecallbackthunk(SB), NOSPLIT, $56-8
// Handle command raceGetProcCmd (0) here.
// First, code below assumes that we are on curg, while raceGetProcCmd
// can be executed on g0. Second, it is called frequently, so will
// benefit from this fast path.
CMPQ RARG0, $0
JNE rest
get_tls(RARG0)
MOVQ g(RARG0), RARG0
MOVQ g_m(RARG0), RARG0
MOVQ m_p(RARG0), RARG0
MOVQ p_racectx(RARG0), RARG0
MOVQ RARG0, (RARG1)
RET
rest:
// Save callee-saved registers (Go code won't respect that).
// This is superset of darwin/linux/windows registers.
PUSHQ BX
......@@ -401,8 +418,10 @@ TEXT runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
MOVQ g_m(R13), R13
MOVQ m_g0(R13), R14
MOVQ R14, g(R12) // g = m->g0
PUSHQ RARG1 // func arg
PUSHQ RARG0 // func arg
CALL runtime·racesymbolize(SB)
CALL runtime·racecallback(SB)
POPQ R12
POPQ R12
// All registers are smashed after Go code, reload.
get_tls(R12)
......
......@@ -451,6 +451,7 @@ type p struct {
syscalltick uint32 // incremented on every system call
m muintptr // back-link to associated m (nil if idle)
mcache *mcache
racectx uintptr
deferpool [5][]*_defer // pool of available defer structs of different sizes (see panic.go)
deferpoolbuf [5][32]*_defer
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment