runtime/traceback: support tracking goroutine ancestor tracebacks with…

runtime/traceback: support tracking goroutine ancestor tracebacks with GODEBUG="tracebackancestors=N" Currently, collecting a stack trace via runtime.Stack captures the stack for the immediately running goroutines. This change extends those tracebacks to include the tracebacks of their ancestors. This is done with a low memory cost and only utilized when debug option tracebackancestors is set to a value greater than 0. Resolves #22289 Change-Id: I7edacc62b2ee3bd278600c4a21052c351f313f3a Reviewed-on: https://go-review.googlesource.com/70993 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>

runtime/traceback: support tracking goroutine ancestor tracebacks with…
runtime/traceback: support tracking goroutine ancestor tracebacks with GODEBUG="tracebackancestors=N" Currently, collecting a stack trace via runtime.Stack captures the stack for the immediately running goroutines. This change extends those tracebacks to include the tracebacks of their ancestors. This is done with a low memory cost and only utilized when debug option tracebackancestors is set to a value greater than 0. Resolves #22289 Change-Id: I7edacc62b2ee3bd278600c4a21052c351f313f3a Reviewed-on: https://go-review.googlesource.com/70993 Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
d9b006a7 · Eric Daniels · Austin Clements · 115b1cd1 · d9b006a7 · d9b006a7
Commit d9b006a7 authored Apr 04, 2018 by Eric Daniels Committed by Austin Clements Apr 13, 2018
8 changed files
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -118,6 +118,12 @@ It is a comma-separated list of name=val pairs setting these named variables:
 	schedtrace: setting schedtrace=X causes the scheduler to emit a single line to standard
 	error every X milliseconds, summarizing the scheduler state.

+	tracebackancestors: setting tracebackancestors=N extends tracebacks with the stacks at
+	which goroutines were created, where N limits the number of ancestor goroutines to
+	report. This also extends the information returned by runtime.Stack. Ancestor's goroutine
+	IDs will refer to the ID of the goroutine at the time of creation; it's possible for this
+	ID to be reused for another goroutine. Setting N to 0 will report no ancestry information.
+
 The net and net/http packages also refer to debugging variables in GODEBUG.
 See the documentation for those packages for details.


--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -3228,16 +3228,17 @@ func malg(stacksize int32) *g {
 //go:nosplit
 func newproc(siz int32, fn *funcval) {
 	argp := add(unsafe.Pointer(&fn), sys.PtrSize)
+	gp := getg()
 	pc := getcallerpc()
 	systemstack(func() {
-		newproc1(fn, (*uint8)(argp), siz, pc)
+		newproc1(fn, (*uint8)(argp), siz, gp, pc)
 	})
 }

 // Create a new g running fn with narg bytes of arguments starting
 // at argp. callerpc is the address of the go statement that created
 // this. The new g is put on the queue of g's waiting to run.
-func newproc1(fn *funcval, argp *uint8, narg int32, callerpc uintptr) {
+func newproc1(fn *funcval, argp *uint8, narg int32, callergp *g, callerpc uintptr) {
 	_g_ := getg()

 	if fn == nil {
@@ -3305,6 +3306,7 @@ func newproc1(fn *funcval, argp *uint8, narg int32, callerpc uintptr) {
 	newg.sched.g = guintptr(unsafe.Pointer(newg))
 	gostartcallfn(&newg.sched, fn)
 	newg.gopc = callerpc
+	newg.ancestors = saveAncestors(callergp)
 	newg.startpc = fn.fn
 	if _g_.m.curg != nil {
 		newg.labels = _g_.m.curg.labels
@@ -3342,6 +3344,40 @@ func newproc1(fn *funcval, argp *uint8, narg int32, callerpc uintptr) {
 	}
 }

+// saveAncestors copies previous ancestors of the given caller g and
+// includes infor for the current caller into a new set of tracebacks for
+// a g being created.
+func saveAncestors(callergp *g) *[]ancestorInfo {
+	// Copy all prior info, except for the root goroutine (goid 0).
+	if debug.tracebackancestors <= 0 || callergp.goid == 0 {
+		return nil
+	}
+	var callerAncestors []ancestorInfo
+	if callergp.ancestors != nil {
+		callerAncestors = *callergp.ancestors
+	}
+	n := int32(len(callerAncestors)) + 1
+	if n > debug.tracebackancestors {
+		n = debug.tracebackancestors
+	}
+	ancestors := make([]ancestorInfo, n)
+	copy(ancestors[1:], callerAncestors)
+
+	var pcs [_TracebackMaxFrames]uintptr
+	npcs := gcallers(callergp, 0, pcs[:])
+	ipcs := make([]uintptr, npcs)
+	copy(ipcs, pcs[:])
+	ancestors[0] = ancestorInfo{
+		pcs:  ipcs,
+		goid: callergp.goid,
+		gopc: callergp.gopc,
+	}
+
+	ancestorsp := new([]ancestorInfo)
+	*ancestorsp = ancestors
+	return ancestorsp
+}
+
 // Put on gfree list.
 // If local list is too long, transfer a batch to the global list.
 func gfput(_p_ *p, gp *g) {

--- a/src/runtime/runtime1.go
+++ b/src/runtime/runtime1.go
@@ -314,20 +314,21 @@ type dbgVar struct {
 // existing int var for that value, which may
 // already have an initial value.
 var debug struct {
-	allocfreetrace   int32
-	cgocheck         int32
-	efence           int32
-	gccheckmark      int32
-	gcpacertrace     int32
-	gcshrinkstackoff int32
-	gcrescanstacks   int32
-	gcstoptheworld   int32
-	gctrace          int32
-	invalidptr       int32
-	sbrk             int32
-	scavenge         int32
-	scheddetail      int32
-	schedtrace       int32
+	allocfreetrace     int32
+	cgocheck           int32
+	efence             int32
+	gccheckmark        int32
+	gcpacertrace       int32
+	gcshrinkstackoff   int32
+	gcrescanstacks     int32
+	gcstoptheworld     int32
+	gctrace            int32
+	invalidptr         int32
+	sbrk               int32
+	scavenge           int32
+	scheddetail        int32
+	schedtrace         int32
+	tracebackancestors int32
 }

 var dbgvars = []dbgVar{
@@ -345,6 +346,7 @@ var dbgvars = []dbgVar{
 	{"scavenge", &debug.scavenge},
 	{"scheddetail", &debug.scheddetail},
 	{"schedtrace", &debug.schedtrace},
+	{"tracebackancestors", &debug.tracebackancestors},
 }

 func parsedebugvars() {

--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -378,8 +378,9 @@ type g struct {
 	sigcode0       uintptr
 	sigcode1       uintptr
 	sigpc          uintptr
-	gopc           uintptr // pc of go statement that created this goroutine
-	startpc        uintptr // pc of goroutine function
+	gopc           uintptr         // pc of go statement that created this goroutine
+	ancestors      *[]ancestorInfo // ancestor information goroutine(s) that created this goroutine (only used if debug.tracebackancestors)
+	startpc        uintptr         // pc of goroutine function
 	racectx        uintptr
 	waiting        *sudog         // sudog structures this g is waiting on (that have a valid elem ptr); in lock order
 	cgoCtxt        []uintptr      // cgo traceback context
@@ -743,6 +744,13 @@ type stkframe struct {
 	argmap   *bitvector // force use of this argmap
 }

+// ancestorInfo records details of where a goroutine was started.
+type ancestorInfo struct {
+	pcs  []uintptr // pcs from the stack of this goroutine
+	goid int64     // goroutine id of this goroutine; original goroutine possibly dead
+	gopc uintptr   // pc of go statement that created this goroutine
+}
+
 const (
 	_TraceRuntimeFrames = 1 << iota // include frames for internal runtime functions.
 	_TraceTrap                      // the initial PC, SP are from a trap, not a return PC from a call

--- a/src/runtime/sizeof_test.go
+++ b/src/runtime/sizeof_test.go
@@ -23,7 +23,7 @@ func TestSizeof(t *testing.T) {
 		_32bit uintptr     // size on 32bit platforms
 		_64bit uintptr     // size on 64bit platforms
 	}{
-		{runtime.G{}, 216, 376}, // g, but exported for testing
+		{runtime.G{}, 220, 384}, // g, but exported for testing
 	}

 	for _, tt := range tests {

--- a/src/runtime/stack_test.go
+++ b/src/runtime/stack_test.go
@@ -8,6 +8,7 @@ import (
 	"bytes"
 	"fmt"
 	"reflect"
+	"regexp"
 	. "runtime"
 	"strings"
 	"sync"
@@ -696,3 +697,42 @@ func TestTracebackSystemstack(t *testing.T) {
 		t.Fatalf("expected 5 calls to TracebackSystemstack and 1 call to TestTracebackSystemstack, got:%s", tb.String())
 	}
 }
+
+func TestTracebackAncestors(t *testing.T) {
+	goroutineRegex := regexp.MustCompile(`goroutine [0-9]+ \[`)
+	for _, tracebackDepth := range []int{0, 1, 5, 50} {
+		output := runTestProg(t, "testprog", "TracebackAncestors", fmt.Sprintf("GODEBUG=tracebackancestors=%d", tracebackDepth))
+
+		numGoroutines := 3
+		numFrames := 2
+		ancestorsExpected := numGoroutines
+		if numGoroutines > tracebackDepth {
+			ancestorsExpected = tracebackDepth
+		}
+
+		matches := goroutineRegex.FindAllStringSubmatch(output, -1)
+		if len(matches) != 2 {
+			t.Fatalf("want 2 goroutines, got:\n%s", output)
+		}
+
+		// Check functions in the traceback.
+		fns := []string{"main.recurseThenCallGo", "main.main", "main.printStack", "main.TracebackAncestors"}
+		for _, fn := range fns {
+			if !strings.Contains(output, "\n"+fn+"(") {
+				t.Fatalf("expected %q function in traceback:\n%s", fn, output)
+			}
+		}
+
+		if want, count := "originating from goroutine", ancestorsExpected; strings.Count(output, want) != count {
+			t.Errorf("output does not contain %d instances of %q:\n%s", count, want, output)
+		}
+
+		if want, count := "main.recurseThenCallGo(...)", ancestorsExpected*(numFrames+1); strings.Count(output, want) != count {
+			t.Errorf("output does not contain %d instances of %q:\n%s", count, want, output)
+		}
+
+		if want, count := "main.recurseThenCallGo(0x", 1; strings.Count(output, want) != count {
+			t.Errorf("output does not contain %d instances of %q:\n%s", count, want, output)
+		}
+	}
+}
--- a/src/runtime/testdata/testprog/traceback_ancestors.go
+++ b/src/runtime/testdata/testprog/traceback_ancestors.go
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"fmt"
+	"runtime"
+)
+
+func init() {
+	register("TracebackAncestors", TracebackAncestors)
+}
+
+const numGoroutines = 3
+const numFrames = 2
+
+func TracebackAncestors() {
+	w := make(chan struct{})
+	recurseThenCallGo(w, numGoroutines, numFrames)
+	<-w
+	printStack()
+	close(w)
+}
+
+func printStack() {
+	buf := make([]byte, 1024)
+	for {
+		n := runtime.Stack(buf, true)
+		if n < len(buf) {
+			fmt.Print(string(buf[:n]))
+			return
+		}
+		buf = make([]byte, 2*len(buf))
+	}
+}
+
+func recurseThenCallGo(w chan struct{}, frames int, goroutines int) {
+	if frames == 0 {
+		// Signal to TracebackAncestors that we are done recursing and starting goroutines.
+		w <- struct{}{}
+		<-w
+		return
+	}
+	if goroutines == 0 {
+		// Start the next goroutine now that there are no more recursions left
+		// for this current goroutine.
+		go recurseThenCallGo(w, frames-1, numFrames)
+		return
+	}
+	recurseThenCallGo(w, frames, goroutines-1)
+}
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -627,18 +627,22 @@ func printcreatedby(gp *g) {
 	pc := gp.gopc
 	f := findfunc(pc)
 	if f.valid() && showframe(f, gp, false, false) && gp.goid != 1 {
-		print("created by ", funcname(f), "\n")
-		tracepc := pc // back up to CALL instruction for funcline.
-		if pc > f.entry {
-			tracepc -= sys.PCQuantum
-		}
-		file, line := funcline(f, tracepc)
-		print("\t", file, ":", line)
-		if pc > f.entry {
-			print(" +", hex(pc-f.entry))
-		}
-		print("\n")
+		printcreatedby1(f, pc)
+	}
+}
+
+func printcreatedby1(f funcInfo, pc uintptr) {
+	print("created by ", funcname(f), "\n")
+	tracepc := pc // back up to CALL instruction for funcline.
+	if pc > f.entry {
+		tracepc -= sys.PCQuantum
 	}
+	file, line := funcline(f, tracepc)
+	print("\t", file, ":", line)
+	if pc > f.entry {
+		print(" +", hex(pc-f.entry))
+	}
+	print("\n")
 }

 func traceback(pc, sp, lr uintptr, gp *g) {
@@ -689,6 +693,71 @@ func traceback1(pc, sp, lr uintptr, gp *g, flags uint) {
 		print("...additional frames elided...\n")
 	}
 	printcreatedby(gp)
+
+	if gp.ancestors == nil {
+		return
+	}
+	for _, ancestor := range *gp.ancestors {
+		printAncestorTraceback(ancestor)
+	}
+}
+
+// printAncestorTraceback prints the traceback of the given ancestor.
+// TODO: Unify this with gentraceback and CallersFrames.
+func printAncestorTraceback(ancestor ancestorInfo) {
+	print("[originating from goroutine ", ancestor.goid, "]:\n")
+	elideWrapper := false
+	for fidx, pc := range ancestor.pcs {
+		f := findfunc(pc) // f previously validated
+		if showfuncinfo(f, fidx == 0, elideWrapper && fidx != 0) {
+			elideWrapper = printAncestorTracebackFuncInfo(f, pc)
+		}
+	}
+	if len(ancestor.pcs) == _TracebackMaxFrames {
+		print("...additional frames elided...\n")
+	}
+	// Show what created goroutine, except main goroutine (goid 1).
+	f := findfunc(ancestor.gopc)
+	if f.valid() && showfuncinfo(f, false, false) && ancestor.goid != 1 {
+		printcreatedby1(f, ancestor.gopc)
+	}
+}
+
+// printAncestorTraceback prints the given function info at a given pc
+// within an ancestor traceback. The precision of this info is reduced
+// due to only have access to the pcs at the time of the caller
+// goroutine being created.
+func printAncestorTracebackFuncInfo(f funcInfo, pc uintptr) bool {
+	tracepc := pc // back up to CALL instruction for funcline.
+	if pc > f.entry {
+		tracepc -= sys.PCQuantum
+	}
+	file, line := funcline(f, tracepc)
+	inldata := funcdata(f, _FUNCDATA_InlTree)
+	if inldata != nil {
+		inltree := (*[1 << 20]inlinedCall)(inldata)
+		ix := pcdatavalue(f, _PCDATA_InlTreeIndex, tracepc, nil)
+		for ix != -1 {
+			name := funcnameFromNameoff(f, inltree[ix].func_)
+			print(name, "(...)\n")
+			print("\t", file, ":", line, "\n")
+
+			file = funcfile(f, inltree[ix].file)
+			line = inltree[ix].line
+			ix = inltree[ix].parent
+		}
+	}
+	name := funcname(f)
+	if name == "runtime.gopanic" {
+		name = "panic"
+	}
+	print(name, "(...)\n")
+	print("\t", file, ":", line)
+	if pc > f.entry {
+		print(" +", hex(pc-f.entry))
+	}
+	print("\n")
+	return elideWrapperCalling(name)
 }

 func callers(skip int, pcbuf []uintptr) int {
@@ -711,6 +780,10 @@ func showframe(f funcInfo, gp *g, firstFrame, elideWrapper bool) bool {
 	if g.m.throwing > 0 && gp != nil && (gp == g.m.curg || gp == g.m.caughtsig.ptr()) {
 		return true
 	}
+	return showfuncinfo(f, firstFrame, elideWrapper)
+}
+
+func showfuncinfo(f funcInfo, firstFrame, elideWrapper bool) bool {
 	level, _, _ := gotraceback()
 	if level > 1 {
 		// Show all frames.