Commit 5bfed7c6 authored by Russ Cox's avatar Russ Cox

runtime: log all thread stack traces during GODEBUG=crash on Linux and OS X

Normally, a panic/throw only shows the thread stack for the current thread
and all paused goroutines. Goroutines running on other threads, or other threads
running on their system stacks, are opaque. Change that when GODEBUG=crash,
by passing a SIGQUIT around to all the threads when GODEBUG=crash.
If this works out reasonably well, we might make the SIGQUIT relay part of
the standard panic/throw death, perhaps eliding idle m's.

Change-Id: If7dd354f7f3a6e326d17c254afcf4f7681af2f8b
Reviewed-on: https://go-review.googlesource.com/2811Reviewed-by: 's avatarRick Hudson <rlh@golang.org>
parent 094a054b
...@@ -3,6 +3,7 @@ package runtime ...@@ -3,6 +3,7 @@ package runtime
const ( const (
// These values are referred to in the source code // These values are referred to in the source code
// but really don't matter. Even so, use the standard numbers. // but really don't matter. Even so, use the standard numbers.
_SIGQUIT = 3
_SIGSEGV = 11 _SIGSEGV = 11
_SIGPROF = 27 _SIGPROF = 27
) )
......
...@@ -3,6 +3,7 @@ package runtime ...@@ -3,6 +3,7 @@ package runtime
const ( const (
// These values are referred to in the source code // These values are referred to in the source code
// but really don't matter. Even so, use the standard numbers. // but really don't matter. Even so, use the standard numbers.
_SIGQUIT = 3
_SIGSEGV = 11 _SIGSEGV = 11
_SIGPROF = 27 _SIGPROF = 27
) )
......
...@@ -3,6 +3,7 @@ package runtime ...@@ -3,6 +3,7 @@ package runtime
const ( const (
// These values are referred to in the source code // These values are referred to in the source code
// but really don't matter. Even so, use the standard numbers. // but really don't matter. Even so, use the standard numbers.
_SIGQUIT = 3
_SIGSEGV = 11 _SIGSEGV = 11
_SIGPROF = 27 _SIGPROF = 27
) )
......
...@@ -41,7 +41,6 @@ const ( ...@@ -41,7 +41,6 @@ const (
DUPLICATE_SAME_ACCESS = C.DUPLICATE_SAME_ACCESS DUPLICATE_SAME_ACCESS = C.DUPLICATE_SAME_ACCESS
THREAD_PRIORITY_HIGHEST = C.THREAD_PRIORITY_HIGHEST THREAD_PRIORITY_HIGHEST = C.THREAD_PRIORITY_HIGHEST
SIGPROF = 0 // dummy value for badsignal
SIGINT = C.SIGINT SIGINT = C.SIGINT
CTRL_C_EVENT = C.CTRL_C_EVENT CTRL_C_EVENT = C.CTRL_C_EVENT
CTRL_BREAK_EVENT = C.CTRL_BREAK_EVENT CTRL_BREAK_EVENT = C.CTRL_BREAK_EVENT
......
...@@ -15,7 +15,6 @@ const ( ...@@ -15,7 +15,6 @@ const (
_DUPLICATE_SAME_ACCESS = 0x2 _DUPLICATE_SAME_ACCESS = 0x2
_THREAD_PRIORITY_HIGHEST = 0x2 _THREAD_PRIORITY_HIGHEST = 0x2
_SIGPROF = 0 // dummy value for badsignal
_SIGINT = 0x2 _SIGINT = 0x2
_CTRL_C_EVENT = 0x0 _CTRL_C_EVENT = 0x0
_CTRL_BREAK_EVENT = 0x1 _CTRL_BREAK_EVENT = 0x1
......
...@@ -15,7 +15,6 @@ const ( ...@@ -15,7 +15,6 @@ const (
_DUPLICATE_SAME_ACCESS = 0x2 _DUPLICATE_SAME_ACCESS = 0x2
_THREAD_PRIORITY_HIGHEST = 0x2 _THREAD_PRIORITY_HIGHEST = 0x2
_SIGPROF = 0 // dummy value for badsignal
_SIGINT = 0x2 _SIGINT = 0x2
_CTRL_C_EVENT = 0x0 _CTRL_C_EVENT = 0x0
_CTRL_BREAK_EVENT = 0x1 _CTRL_BREAK_EVENT = 0x1
......
...@@ -575,6 +575,14 @@ func setBadSignalMsg() { ...@@ -575,6 +575,14 @@ func setBadSignalMsg() {
} }
} }
const (
_SIGPROF = 0 // dummy value for badsignal
_SIGQUIT = 0 // dummy value for sighandler
)
func raiseproc(sig int32) {
}
func crash() { func crash() {
// TODO: This routine should do whatever is needed // TODO: This routine should do whatever is needed
// to make the Windows program abort/crash as it // to make the Windows program abort/crash as it
......
...@@ -69,6 +69,6 @@ const ( ...@@ -69,6 +69,6 @@ const (
_SIGINTDIV = 4 _SIGINTDIV = 4
_SIGFLOAT = 5 _SIGFLOAT = 5
_SIGTRAP = 6 _SIGTRAP = 6
// dummy value defined for badsignal _SIGPROF = 0 // dummy value defined for badsignal
_SIGPROF = 0 _SIGQUIT = 0 // dummy value defined for sighandler
) )
...@@ -418,6 +418,10 @@ func raise(sig int32) /* int32 */ { ...@@ -418,6 +418,10 @@ func raise(sig int32) /* int32 */ {
sysvicall1(libc_raise, uintptr(sig)) sysvicall1(libc_raise, uintptr(sig))
} }
func raiseproc(sig int32) /* int32 */ {
sysvicall1(libc_raise, uintptr(sig))
}
//go:nosplit //go:nosplit
func read(fd int32, buf unsafe.Pointer, nbyte int32) int32 { func read(fd int32, buf unsafe.Pointer, nbyte int32) int32 {
return int32(sysvicall3(libc_read, uintptr(fd), uintptr(buf), uintptr(nbyte))) return int32(sysvicall3(libc_read, uintptr(fd), uintptr(buf), uintptr(nbyte)))
......
...@@ -34,3 +34,4 @@ func sigtramp() ...@@ -34,3 +34,4 @@ func sigtramp()
func setitimer(mode int32, new, old *itimerval) func setitimer(mode int32, new, old *itimerval)
func raise(int32) func raise(int32)
func raiseproc(int32)
...@@ -28,6 +28,7 @@ func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, nds ...@@ -28,6 +28,7 @@ func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, nds
func getrlimit(kind int32, limit unsafe.Pointer) int32 func getrlimit(kind int32, limit unsafe.Pointer) int32
func raise(sig int32) func raise(sig int32)
func raiseproc(sig int32)
//go:noescape //go:noescape
func sys_umtx_sleep(addr *uint32, val, timeout int32) int32 func sys_umtx_sleep(addr *uint32, val, timeout int32) int32
......
...@@ -27,6 +27,7 @@ func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, nds ...@@ -27,6 +27,7 @@ func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, nds
//go:noescape //go:noescape
func getrlimit(kind int32, limit unsafe.Pointer) int32 func getrlimit(kind int32, limit unsafe.Pointer) int32
func raise(sig int32) func raise(sig int32)
func raiseproc(sig int32)
//go:noescape //go:noescape
func sys_umtx_op(addr *uint32, mode int32, val uint32, ptr2, ts *timespec) int32 func sys_umtx_op(addr *uint32, mode int32, val uint32, ptr2, ts *timespec) int32
......
...@@ -27,6 +27,7 @@ func rtsigprocmask(sig uint32, new, old *sigset, size int32) ...@@ -27,6 +27,7 @@ func rtsigprocmask(sig uint32, new, old *sigset, size int32)
//go:noescape //go:noescape
func getrlimit(kind int32, limit unsafe.Pointer) int32 func getrlimit(kind int32, limit unsafe.Pointer) int32
func raise(sig uint32) func raise(sig uint32)
func raiseproc(sig uint32)
//go:noescape //go:noescape
func sched_getaffinity(pid, len uintptr, buf *uintptr) int32 func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
......
...@@ -49,3 +49,6 @@ func sigpanic() { ...@@ -49,3 +49,6 @@ func sigpanic() {
g.sig = _SIGSEGV g.sig = _SIGSEGV
panicmem() panicmem()
} }
func raiseproc(sig int32) {
}
...@@ -24,6 +24,7 @@ func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, nds ...@@ -24,6 +24,7 @@ func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, nds
func lwp_tramp() func lwp_tramp()
func raise(sig int32) func raise(sig int32)
func raiseproc(sig int32)
//go:noescape //go:noescape
func getcontext(ctxt unsafe.Pointer) func getcontext(ctxt unsafe.Pointer)
......
...@@ -20,6 +20,7 @@ func sigprocmask(mode int32, new uint32) uint32 ...@@ -20,6 +20,7 @@ func sigprocmask(mode int32, new uint32) uint32
func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32 func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
func raise(sig int32) func raise(sig int32)
func raiseproc(sig int32)
//go:noescape //go:noescape
func tfork(param *tforkt, psize uintptr, mm *m, gg *g, fn uintptr) int32 func tfork(param *tforkt, psize uintptr, mm *m, gg *g, fn uintptr) int32
......
...@@ -7,7 +7,9 @@ ...@@ -7,7 +7,9 @@
package runtime package runtime
import "unsafe" import (
"unsafe"
)
func dumpregs(c *sigctxt) { func dumpregs(c *sigctxt) {
print("rax ", hex(c.rax()), "\n") print("rax ", hex(c.rax()), "\n")
...@@ -33,6 +35,8 @@ func dumpregs(c *sigctxt) { ...@@ -33,6 +35,8 @@ func dumpregs(c *sigctxt) {
print("gs ", hex(c.gs()), "\n") print("gs ", hex(c.gs()), "\n")
} }
var crashing int32
func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
_g_ := getg() _g_ := getg()
c := &sigctxt{info, ctxt} c := &sigctxt{info, ctxt}
...@@ -131,7 +135,10 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { ...@@ -131,7 +135,10 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
_g_.m.throwing = 1 _g_.m.throwing = 1
_g_.m.caughtsig = gp _g_.m.caughtsig = gp
startpanic()
if crashing == 0 {
startpanic()
}
if sig < uint32(len(sigtable)) { if sig < uint32(len(sigtable)) {
print(sigtable[sig].name, "\n") print(sigtable[sig].name, "\n")
...@@ -139,7 +146,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { ...@@ -139,7 +146,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
print("Signal ", sig, "\n") print("Signal ", sig, "\n")
} }
print("PC=", hex(c.rip()), "\n") print("PC=", hex(c.rip()), " m=", _g_.m.id, "\n")
if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 { if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
print("signal arrived during cgo execution\n") print("signal arrived during cgo execution\n")
gp = _g_.m.lockedg gp = _g_.m.lockedg
...@@ -150,12 +157,39 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) { ...@@ -150,12 +157,39 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
if gotraceback(&docrash) > 0 { if gotraceback(&docrash) > 0 {
goroutineheader(gp) goroutineheader(gp)
tracebacktrap(uintptr(c.rip()), uintptr(c.rsp()), 0, gp) tracebacktrap(uintptr(c.rip()), uintptr(c.rsp()), 0, gp)
tracebackothers(gp) if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
print("\n") // tracebackothers on original m skipped this one; trace it now.
goroutineheader(_g_.m.curg)
traceback(^uintptr(0), ^uintptr(0), 0, gp)
} else if crashing == 0 {
tracebackothers(gp)
print("\n")
}
dumpregs(c) dumpregs(c)
} }
if docrash { if docrash {
// TODO(rsc): Implement raiseproc on other systems
// and then add to this switch.
switch GOOS {
case "darwin", "linux":
crashing++
if crashing < sched.mcount {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
usleep(5 * 1000 * 1000)
}
}
crash() crash()
} }
......
...@@ -50,7 +50,12 @@ TEXT runtime·write(SB),NOSPLIT,$0 ...@@ -50,7 +50,12 @@ TEXT runtime·write(SB),NOSPLIT,$0
MOVL AX, ret+12(FP) MOVL AX, ret+12(FP)
RET RET
TEXT runtime·raise(SB),NOSPLIT,$16 TEXT runtime·raise(SB),NOSPLIT,$0
// Ideally we'd send the signal to the current thread,
// not the whole process, but that's too hard on OS X.
JMP runtime·raiseproc(SB)
TEXT runtime·raiseproc(SB),NOSPLIT,$16
MOVL $20, AX // getpid MOVL $20, AX // getpid
INT $0x80 INT $0x80
MOVL AX, 4(SP) // pid MOVL AX, 4(SP) // pid
......
...@@ -66,7 +66,12 @@ TEXT runtime·write(SB),NOSPLIT,$0 ...@@ -66,7 +66,12 @@ TEXT runtime·write(SB),NOSPLIT,$0
MOVL AX, ret+24(FP) MOVL AX, ret+24(FP)
RET RET
TEXT runtime·raise(SB),NOSPLIT,$24 TEXT runtime·raise(SB),NOSPLIT,$0
// Ideally we'd send the signal to the current thread,
// not the whole process, but that's too hard on OS X.
JMP runtime·raiseproc(SB)
TEXT runtime·raiseproc(SB),NOSPLIT,$24
MOVL $(0x2000000+20), AX // getpid MOVL $(0x2000000+20), AX // getpid
SYSCALL SYSCALL
MOVQ AX, DI // arg 1 - pid MOVQ AX, DI // arg 1 - pid
......
...@@ -93,6 +93,15 @@ TEXT runtime·raise(SB),NOSPLIT,$12 ...@@ -93,6 +93,15 @@ TEXT runtime·raise(SB),NOSPLIT,$12
CALL *runtime·_vdso(SB) CALL *runtime·_vdso(SB)
RET RET
TEXT runtime·raiseproc(SB),NOSPLIT,$12
MOVL $20, AX // syscall - getpid
CALL *runtime·_vdso(SB)
MOVL AX, BX // arg 1 pid
MOVL sig+0(FP), CX // arg 2 signal
MOVL $37, AX // syscall - kill
CALL *runtime·_vdso(SB)
RET
TEXT runtime·setitimer(SB),NOSPLIT,$0-12 TEXT runtime·setitimer(SB),NOSPLIT,$0-12
MOVL $104, AX // syscall - setitimer MOVL $104, AX // syscall - setitimer
MOVL mode+0(FP), BX MOVL mode+0(FP), BX
......
...@@ -91,6 +91,15 @@ TEXT runtime·raise(SB),NOSPLIT,$0 ...@@ -91,6 +91,15 @@ TEXT runtime·raise(SB),NOSPLIT,$0
SYSCALL SYSCALL
RET RET
TEXT runtime·raiseproc(SB),NOSPLIT,$0
MOVL $39, AX // syscall - getpid
SYSCALL
MOVL AX, DI // arg 1 pid
MOVL sig+0(FP), SI // arg 2
MOVL $62, AX // syscall - kill
SYSCALL
RET
TEXT runtime·setitimer(SB),NOSPLIT,$0-24 TEXT runtime·setitimer(SB),NOSPLIT,$0-24
MOVL mode+0(FP), DI MOVL mode+0(FP), DI
MOVQ new+8(FP), SI MOVQ new+8(FP), SI
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#define SYS_write (SYS_BASE + 4) #define SYS_write (SYS_BASE + 4)
#define SYS_open (SYS_BASE + 5) #define SYS_open (SYS_BASE + 5)
#define SYS_close (SYS_BASE + 6) #define SYS_close (SYS_BASE + 6)
#define SYS_getpid (SYS_BASE + 20)
#define SYS_kill (SYS_BASE + 37)
#define SYS_gettimeofday (SYS_BASE + 78) #define SYS_gettimeofday (SYS_BASE + 78)
#define SYS_clone (SYS_BASE + 120) #define SYS_clone (SYS_BASE + 120)
#define SYS_rt_sigreturn (SYS_BASE + 173) #define SYS_rt_sigreturn (SYS_BASE + 173)
...@@ -113,6 +115,15 @@ TEXT runtime·raise(SB),NOSPLIT,$-4 ...@@ -113,6 +115,15 @@ TEXT runtime·raise(SB),NOSPLIT,$-4
SWI $0 SWI $0
RET RET
TEXT runtime·raiseproc(SB),NOSPLIT,$-4
MOVW $SYS_getpid, R7
SWI $0
// arg 1 tid already in R0 from getpid
MOVW sig+0(FP), R1 // arg 2 - signal
MOVW $SYS_kill, R7
SWI $0
RET
TEXT runtime·mmap(SB),NOSPLIT,$0 TEXT runtime·mmap(SB),NOSPLIT,$0
MOVW 0(FP), R0 MOVW 0(FP), R0
MOVW 4(FP), R1 MOVW 4(FP), R1
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#define SYS_write 4 #define SYS_write 4
#define SYS_open 5 #define SYS_open 5
#define SYS_close 6 #define SYS_close 6
#define SYS_getpid 20
#define SYS_kill 37
#define SYS_fcntl 55 #define SYS_fcntl 55
#define SYS_gettimeofday 78 #define SYS_gettimeofday 78
#define SYS_select 82 // always return -ENOSYS #define SYS_select 82 // always return -ENOSYS
...@@ -118,6 +120,13 @@ TEXT runtime·raise(SB),NOSPLIT,$-8 ...@@ -118,6 +120,13 @@ TEXT runtime·raise(SB),NOSPLIT,$-8
SYSCALL $SYS_tkill SYSCALL $SYS_tkill
RETURN RETURN
TEXT runtime·raiseproc(SB),NOSPLIT,$-8
SYSCALL $SYS_getpid
MOVW R3, R3 // arg 1 pid
MOVW sig+0(FP), R4 // arg 2
SYSCALL $SYS_kill
RETURN
TEXT runtime·setitimer(SB),NOSPLIT,$-8-24 TEXT runtime·setitimer(SB),NOSPLIT,$-8-24
MOVW mode+0(FP), R3 MOVW mode+0(FP), R3
MOVD new+8(FP), R4 MOVD new+8(FP), R4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment