Commit cbafcc55 authored by Keith Randall's avatar Keith Randall Committed by Keith Randall

cmd/compile,runtime: implement stack objects

Rework how the compiler+runtime handles stack-allocated variables
whose address is taken.

Direct references to such variables work as before. References through
pointers, however, use a new mechanism. The new mechanism is more
precise than the old "ambiguously live" mechanism. It computes liveness
at runtime based on the actual references among objects on the stack.

Each function records all of its address-taken objects in a FUNCDATA.
These are called "stack objects". The runtime then uses that
information while scanning a stack to find all of the stack objects on
a stack. It then does a mark phase on the stack objects, using all the
pointers found on the stack (and ancillary structures, like defer
records) as the root set. Only stack objects which are found to be
live during this mark phase will be scanned and thus retain any heap
objects they point to.

A subsequent CL will remove all the "ambiguously live" logic from
the compiler, so that the stack object tracing will be required.
For this CL, the stack tracing is all redundant with the current
ambiguously live logic.

Update #22350

Change-Id: Ide19f1f71a5b6ec8c4d54f8f66f0e9a98344772f
Reviewed-on: https://go-review.googlesource.com/c/134155Reviewed-by: 's avatarAustin Clements <austin@google.com>
parent 43349661
......@@ -281,7 +281,7 @@ func dumpglobls() {
funcsyms = nil
}
// addGCLocals adds gcargs and gclocals symbols to Ctxt.Data.
// addGCLocals adds gcargs, gclocals, gcregs, and stack object symbols to Ctxt.Data.
// It takes care not to add any duplicates.
// Though the object file format handles duplicates efficiently,
// storing only a single copy of the data,
......@@ -299,6 +299,9 @@ func addGCLocals() {
Ctxt.Data = append(Ctxt.Data, gcsym)
seen[gcsym.Name] = true
}
if x := s.Func.StackObjects; x != nil {
ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.LOCAL)
}
}
}
......
......@@ -233,6 +233,26 @@ func compile(fn *Node) {
// Set up the function's LSym early to avoid data races with the assemblers.
fn.Func.initLSym()
// Make sure type syms are declared for all types that might
// be types of stack objects. We need to do this here
// because symbols must be allocated before the parallel
// phase of the compiler.
if fn.Func.lsym != nil { // not func _(){}
for _, n := range fn.Func.Dcl {
switch n.Class() {
case PPARAM, PPARAMOUT, PAUTO:
if livenessShouldTrack(n) && n.Addrtaken() {
dtypesym(n.Type)
// Also make sure we allocate a linker symbol
// for the stack object data, for the same reason.
if fn.Func.lsym.Func.StackObjects == nil {
fn.Func.lsym.Func.StackObjects = lookup(fmt.Sprintf("%s.stkobj", fn.funcname())).Linksym()
}
}
}
}
}
if compilenow() {
compileSSA(fn, 0)
} else {
......
......@@ -16,6 +16,7 @@ import (
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/objabi"
"cmd/internal/src"
"cmd/internal/sys"
)
......@@ -4933,6 +4934,51 @@ func (s *SSAGenState) DebugFriendlySetPosFrom(v *ssa.Value) {
}
}
// byXoffset implements sort.Interface for []*Node using Xoffset as the ordering.
type byXoffset []*Node
func (s byXoffset) Len() int { return len(s) }
func (s byXoffset) Less(i, j int) bool { return s[i].Xoffset < s[j].Xoffset }
func (s byXoffset) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func emitStackObjects(e *ssafn, pp *Progs) {
var vars []*Node
for _, n := range e.curfn.Func.Dcl {
if livenessShouldTrack(n) && n.Addrtaken() {
vars = append(vars, n)
}
}
if len(vars) == 0 {
return
}
// Sort variables from lowest to highest address.
sort.Sort(byXoffset(vars))
// Populate the stack object data.
// Format must match runtime/stack.go:stackObjectRecord.
x := e.curfn.Func.lsym.Func.StackObjects
off := 0
off = duintptr(x, off, uint64(len(vars)))
for _, v := range vars {
// Note: arguments and return values have non-negative Xoffset,
// in which case the offset is relative to argp.
// Locals have a negative Xoffset, in which case the offset is relative to varp.
off = duintptr(x, off, uint64(v.Xoffset))
if !typesym(v.Type).Siggen() {
Fatalf("stack object's type symbol not generated for type %s", v.Type)
}
off = dsymptr(x, off, dtypesym(v.Type), 0)
}
// Emit a funcdata pointing at the stack object data.
p := pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_StackObjects)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = x
}
// genssa appends entries to pp for each instruction in f.
func genssa(f *ssa.Func, pp *Progs) {
var s SSAGenState
......@@ -4940,6 +4986,7 @@ func genssa(f *ssa.Func, pp *Progs) {
e := f.Frontend().(*ssafn)
s.livenessMap = liveness(e, f)
emitStackObjects(e, pp)
// Remember where each block starts.
s.bstart = make([]*obj.Prog, f.NumBlocks())
......@@ -5054,7 +5101,6 @@ func genssa(f *ssa.Func, pp *Progs) {
}
}
}
// Emit control flow instructions for block
var next *ssa.Block
if i < len(f.Blocks)-1 && Debug['N'] == 0 {
......
......@@ -402,9 +402,10 @@ type FuncInfo struct {
dwarfAbsFnSym *LSym
dwarfIsStmtSym *LSym
GCArgs LSym
GCLocals LSym
GCRegs LSym
GCArgs LSym
GCLocals LSym
GCRegs LSym
StackObjects *LSym
}
// Attribute is a set of symbol attributes.
......
......@@ -18,6 +18,7 @@ const (
FUNCDATA_LocalsPointerMaps = 1
FUNCDATA_InlTree = 2
FUNCDATA_RegPointerMaps = 3
FUNCDATA_StackObjects = 4
// ArgsSizeUnknown is set in Func.argsize to mark all functions
// whose argument size is unknown (C vararg functions, and
......
......@@ -5988,7 +5988,8 @@ func TestFuncLayout(t *testing.T) {
func verifyGCBits(t *testing.T, typ Type, bits []byte) {
heapBits := GCBits(New(typ).Interface())
if !bytes.Equal(heapBits, bits) {
t.Errorf("heapBits incorrect for %v\nhave %v\nwant %v", typ, heapBits, bits)
_, _, line, _ := runtime.Caller(1)
t.Errorf("line %d: heapBits incorrect for %v\nhave %v\nwant %v", line, typ, heapBits, bits)
}
}
......
......@@ -16,6 +16,7 @@
#define FUNCDATA_LocalsPointerMaps 1
#define FUNCDATA_InlTree 2
#define FUNCDATA_RegPointerMaps 3
#define FUNCDATA_StackObjects 4
// Pseudo-assembly statements.
......
......@@ -1911,6 +1911,20 @@ Run:
return totalBits
}
// materializeGCProg allocates space for the (1-bit) pointer bitmask
// for an object of size ptrdata. Then it fills that space with the
// pointer bitmask specified by the program prog.
// The bitmask starts at s.startAddr.
// The result must be deallocated with dematerializeGCProg.
func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
s := mheap_.allocManual((ptrdata/(8*sys.PtrSize)+pageSize-1)/pageSize, &memstats.gc_sys)
runGCProg(addb(prog, 4), nil, (*byte)(unsafe.Pointer(s.startAddr)), 1)
return s
}
func dematerializeGCProg(s *mspan) {
mheap_.freeManual(s, &memstats.gc_sys)
}
func dumpGCProg(p *byte) {
nptr := 0
for {
......@@ -2037,7 +2051,12 @@ func getgcmask(ep interface{}) (mask []byte) {
_g_ := getg()
gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
if frame.fn.valid() {
locals, _ := getStackMap(&frame, nil, false)
// TODO: once stack objects are enabled (and their pointers
// are no longer described by the stack pointermap directly),
// tests using this will probably need fixing. We might need
// to loop through the stackobjects and if we're inside one,
// use the pointermap from that object.
locals, _, _ := getStackMap(&frame, nil, false)
if locals.n == 0 {
return
}
......
......@@ -169,7 +169,7 @@ func markroot(gcw *gcWork, i uint32) {
case i == fixedRootFinalizers:
for fb := allfin; fb != nil; fb = fb.alllink {
cnt := uintptr(atomic.Load(&fb.cnt))
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw)
scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw, nil)
}
case i == fixedRootFreeGStacks:
......@@ -248,7 +248,7 @@ func markrootBlock(b0, n0 uintptr, ptrmask0 *uint8, gcw *gcWork, shard int) {
}
// Scan this shard.
scanblock(b, n, ptrmask, gcw)
scanblock(b, n, ptrmask, gcw, nil)
}
// markrootFreeGStacks frees stacks of dead Gs.
......@@ -349,7 +349,7 @@ func markrootSpans(gcw *gcWork, shard int) {
scanobject(p, gcw)
// The special itself is a root.
scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw)
scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
}
unlock(&s.speciallock)
......@@ -689,42 +689,136 @@ func scanstack(gp *g, gcw *gcWork) {
// Shrink the stack if not much of it is being used.
shrinkstack(gp)
var state stackScanState
state.stack = gp.stack
if stackTraceDebug {
println("stack trace goroutine", gp.goid)
}
// Scan the saved context register. This is effectively a live
// register that gets moved back and forth between the
// register and sched.ctxt without a write barrier.
if gp.sched.ctxt != nil {
scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw)
scanblock(uintptr(unsafe.Pointer(&gp.sched.ctxt)), sys.PtrSize, &oneptrmask[0], gcw, &state)
}
// Scan the stack.
var cache pcvalueCache
// Scan the stack. Accumulate a list of stack objects.
scanframe := func(frame *stkframe, unused unsafe.Pointer) bool {
scanframeworker(frame, &cache, gcw)
scanframeworker(frame, &state, gcw)
return true
}
gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
tracebackdefers(gp, scanframe, nil)
// Find and scan all reachable stack objects.
state.buildIndex()
for {
p := state.getPtr()
if p == 0 {
break
}
obj := state.findObject(p)
if obj == nil {
continue
}
t := obj.typ
if t == nil {
// We've already scanned this object.
continue
}
obj.setType(nil) // Don't scan it again.
if stackTraceDebug {
println(" live stkobj at", hex(state.stack.lo+uintptr(obj.off)), "of type", t.string())
}
gcdata := t.gcdata
var s *mspan
if t.kind&kindGCProg != 0 {
// This path is pretty unlikely, an object large enough
// to have a GC program allocated on the stack.
// We need some space to unpack the program into a straight
// bitmask, which we allocate/free here.
// TODO: it would be nice if there were a way to run a GC
// program without having to store all its bits. We'd have
// to change from a Lempel-Ziv style program to something else.
// Or we can forbid putting objects on stacks if they require
// a gc program (see issue 27447).
s = materializeGCProg(t.ptrdata, gcdata)
gcdata = (*byte)(unsafe.Pointer(s.startAddr))
}
scanblock(state.stack.lo+uintptr(obj.off), t.ptrdata, gcdata, gcw, &state)
if s != nil {
dematerializeGCProg(s)
}
}
// Deallocate object buffers.
// (Pointer buffers were all deallocated in the loop above.)
for state.head != nil {
x := state.head
state.head = x.next
if stackTraceDebug {
for _, obj := range x.obj[:x.nobj] {
if obj.typ == nil { // reachable
continue
}
println(" dead stkobj at", hex(gp.stack.lo+uintptr(obj.off)), "of type", obj.typ.string())
// Note: not necessarily really dead - only reachable-from-ptr dead.
}
}
x.nobj = 0
putempty((*workbuf)(unsafe.Pointer(x)))
}
if state.buf != nil || state.freeBuf != nil {
throw("remaining pointer buffers")
}
gp.gcscanvalid = true
}
// Scan a stack frame: local variables and function arguments/results.
//go:nowritebarrier
func scanframeworker(frame *stkframe, cache *pcvalueCache, gcw *gcWork) {
func scanframeworker(frame *stkframe, state *stackScanState, gcw *gcWork) {
if _DebugGC > 1 && frame.continpc != 0 {
print("scanframe ", funcname(frame.fn), "\n")
}
locals, args := getStackMap(frame, cache, false)
locals, args, objs := getStackMap(frame, &state.cache, false)
// Scan local variables if stack frame has been allocated.
if locals.n > 0 {
size := uintptr(locals.n) * sys.PtrSize
scanblock(frame.varp-size, size, locals.bytedata, gcw)
scanblock(frame.varp-size, size, locals.bytedata, gcw, state)
}
// Scan arguments.
if args.n > 0 {
scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw)
scanblock(frame.argp, uintptr(args.n)*sys.PtrSize, args.bytedata, gcw, state)
}
// Add all stack objects to the stack object list.
if frame.varp != 0 {
// varp is 0 for defers, where there are no locals.
// In that case, there can't be a pointer to its args, either.
// (And all args would be scanned above anyway.)
for _, obj := range objs {
off := obj.off
base := frame.varp // locals base pointer
if off >= 0 {
base = frame.argp // arguments and return values base pointer
}
ptr := base + uintptr(off)
if ptr < frame.sp {
// object hasn't been allocated in the frame yet.
continue
}
if stackTraceDebug {
println("stkobj at", hex(ptr), "of type", obj.typ.string())
}
state.addObject(ptr, obj.typ)
}
}
}
......@@ -939,8 +1033,9 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
// This is used to scan non-heap roots, so it does not update
// gcw.bytesMarked or gcw.scanWork.
//
// If stk != nil, possible stack pointers are also reported to stk.putPtr.
//go:nowritebarrier
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork, stk *stackScanState) {
// Use local copies of original parameters, so that a stack trace
// due to one of the throws below shows the original block
// base and extent.
......@@ -957,10 +1052,12 @@ func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
for j := 0; j < 8 && i < n; j++ {
if bits&1 != 0 {
// Same work as in scanobject; see comments there.
obj := *(*uintptr)(unsafe.Pointer(b + i))
if obj != 0 {
if obj, span, objIndex := findObject(obj, b, i); obj != 0 {
p := *(*uintptr)(unsafe.Pointer(b + i))
if p != 0 {
if obj, span, objIndex := findObject(p, b, i); obj != 0 {
greyobject(obj, b, i, span, gcw, objIndex)
} else if stk != nil && p >= stk.stack.lo && p < stk.stack.hi {
stk.putPtr(p)
}
}
}
......
This diff is collapsed.
......@@ -1437,7 +1437,7 @@ func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *p
scanobject(base, gcw)
// Mark the finalizer itself, since the
// special isn't part of the GC'd heap.
scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw)
scanblock(uintptr(unsafe.Pointer(&s.fn)), sys.PtrSize, &oneptrmask[0], gcw, nil)
releasem(mp)
}
return true
......
......@@ -625,7 +625,7 @@ func adjustframe(frame *stkframe, arg unsafe.Pointer) bool {
return true
}
locals, args := getStackMap(frame, &adjinfo.cache, true)
locals, args, objs := getStackMap(frame, &adjinfo.cache, true)
// Adjust local variables if stack frame has been allocated.
if locals.n > 0 {
......@@ -663,6 +663,42 @@ func adjustframe(frame *stkframe, arg unsafe.Pointer) bool {
}
adjustpointers(unsafe.Pointer(frame.argp), &args, adjinfo, funcInfo{})
}
// Adjust pointers in all stack objects (whether they are live or not).
// See comments in mgcmark.go:scanframeworker.
if frame.varp != 0 {
for _, obj := range objs {
off := obj.off
base := frame.varp // locals base pointer
if off >= 0 {
base = frame.argp // arguments and return values base pointer
}
p := base + uintptr(off)
if p < frame.sp {
// Object hasn't been allocated in the frame yet.
// (Happens when the stack bounds check fails and
// we call into morestack.)
continue
}
t := obj.typ
gcdata := t.gcdata
var s *mspan
if t.kind&kindGCProg != 0 {
// See comments in mgcmark.go:scanstack
s = materializeGCProg(t.ptrdata, gcdata)
gcdata = (*byte)(unsafe.Pointer(s.startAddr))
}
for i := uintptr(0); i < t.ptrdata; i += sys.PtrSize {
if *addb(gcdata, i/(8*sys.PtrSize))>>(i/sys.PtrSize&7)&1 != 0 {
adjustpointer(adjinfo, unsafe.Pointer(p+i))
}
}
if s != nil {
dematerializeGCProg(s)
}
}
}
return true
}
......@@ -1136,9 +1172,9 @@ func freeStackSpans() {
unlock(&stackLarge.lock)
}
// getStackMap returns the locals and arguments live pointer maps for
// frame.
func getStackMap(frame *stkframe, cache *pcvalueCache, debug bool) (locals, args bitvector) {
// getStackMap returns the locals and arguments live pointer maps, and
// stack object list for frame.
func getStackMap(frame *stkframe, cache *pcvalueCache, debug bool) (locals, args bitvector, objs []stackObjectRecord) {
targetpc := frame.continpc
if targetpc == 0 {
// Frame is dead. Return empty bitvectors.
......@@ -1235,9 +1271,33 @@ func getStackMap(frame *stkframe, cache *pcvalueCache, debug bool) (locals, args
}
}
}
// stack objects.
p := funcdata(f, _FUNCDATA_StackObjects)
if p != nil {
n := *(*uintptr)(p)
p = add(p, sys.PtrSize)
*(*slice)(unsafe.Pointer(&objs)) = slice{array: noescape(p), len: int(n), cap: int(n)}
// Note: the noescape above is needed to keep
// getStackMap from from "leaking param content:
// frame". That leak propagates up to getgcmask, then
// GCMask, then verifyGCInfo, which converts the stack
// gcinfo tests into heap gcinfo tests :(
}
return
}
// A stackObjectRecord is generated by the compiler for each stack object in a stack frame.
// This record must match the generator code in cmd/compile/internal/gc/ssa.go:emitStackObjects.
type stackObjectRecord struct {
// offset in frame
// if negative, offset from varp
// if non-negative, offset from argp
off int
typ *_type
}
//go:nosplit
func morestackc() {
throw("attempt to execute system stack code on user stack")
......
......@@ -348,6 +348,7 @@ const (
_FUNCDATA_LocalsPointerMaps = 1
_FUNCDATA_InlTree = 2
_FUNCDATA_RegPointerMaps = 3
_FUNCDATA_StackObjects = 4
_ArgsSizeUnknown = -0x80000000
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment