Commit 3f22adec authored by Austin Clements's avatar Austin Clements

runtime: fix sigprof stack barrier locking

f90b48e0 intended to require the stack barrier lock in all cases of
sigprof that walked the user stack, but got it wrong. In particular,
if sp < gp.stack.lo || gp.stack.hi < sp, tracebackUser would be true,
but we wouldn't acquire the stack lock. If it then turned out that we
were in a cgo call, it would walk the stack without the lock.

In fact, the whole structure of stack locking is sigprof is somewhat
wrong because it assumes the G to lock is gp.m.curg, but all three
gentraceback calls start from potentially different Gs.

To fix this, we lower the gcTryLockStackBarriers calls much closer to
the gentraceback calls. There are now three separate trylock calls,
each clearly associated with a gentraceback and the locked G clearly
matches the G from which the gentraceback starts. This actually brings
the sigprof logic closer to what it originally was before stack
barrier locking.

This depends on "runtime: increase assumed stack size in
externalthreadhandler" because it very slightly increases the stack
used by sigprof; without this other commit, this is enough to blow the
profiler thread's assumed stack size.

Fixes #12528 (hopefully for real this time!).

For the 1.5 branch, though it will require some backporting. On the
1.5 branch, this will *not* require the "runtime: increase assumed
stack size in externalthreadhandler" commit: there's no pcvalue cache,
so the used stack is smaller.

Change-Id: Id2f6446ac276848f6fc158bee550cccd03186b83
Reviewed-on: https://go-review.googlesource.com/18328
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarRuss Cox <rsc@golang.org>
parent fdf9b3c9
......@@ -2989,49 +2989,50 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
// To recap, there are no constraints on the assembly being used for the
// transition. We simply require that g and SP match and that the PC is not
// in gogo.
traceback, tracebackUser := true, true
haveStackLock := false
traceback := true
if gp == nil || sp < gp.stack.lo || gp.stack.hi < sp || setsSP(pc) {
traceback = false
} else if gp.m.curg != nil {
// The user stack is safe to scan only if we can
// acquire the stack barrier lock.
if gcTryLockStackBarriers(gp.m.curg) {
haveStackLock = true
} else {
// Stack barriers are being inserted or
// removed, so we can't get a consistent
// traceback of the user stack right now.
tracebackUser = false
if gp == gp.m.curg {
// We're on the user stack, so don't
// do any traceback.
traceback = false
}
}
}
var stk [maxCPUProfStack]uintptr
var haveStackLock *g
n := 0
if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 && tracebackUser {
if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
// Cgo, we can't unwind and symbolize arbitrary C code,
// so instead collect Go stack that leads to the cgo call.
// This is especially important on windows, since all syscalls are cgo calls.
n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
if gcTryLockStackBarriers(mp.curg) {
haveStackLock = mp.curg
n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
}
} else if traceback {
var flags uint = _TraceTrap
if tracebackUser {
if gp.m.curg != nil && gcTryLockStackBarriers(gp.m.curg) {
// It's safe to traceback the user stack.
haveStackLock = gp.m.curg
flags |= _TraceJumpStack
}
n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, flags)
// Traceback is safe if we're on the system stack (if
// necessary, flags will stop it before switching to
// the user stack), or if we locked the user stack.
if gp != gp.m.curg || haveStackLock != nil {
n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, flags)
}
}
if haveStackLock != nil {
gcUnlockStackBarriers(haveStackLock)
}
if n <= 0 {
// Normal traceback is impossible or has failed.
// See if it falls into several common cases.
n = 0
if GOOS == "windows" && n == 0 && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 && tracebackUser {
if GOOS == "windows" && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 {
// Libcall, i.e. runtime syscall on windows.
// Collect Go stack that leads to the call.
n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
if gcTryLockStackBarriers(mp.libcallg.ptr()) {
n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), 0, &stk[0], len(stk), nil, nil, 0)
gcUnlockStackBarriers(mp.libcallg.ptr())
}
}
if n == 0 {
// If all of the above has failed, account it against abstract "System" or "GC".
......@@ -3048,9 +3049,6 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
}
}
}
if haveStackLock {
gcUnlockStackBarriers(gp.m.curg)
}
if prof.hz != 0 {
// Simple cas-lock to coordinate with setcpuprofilerate.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment