Commit 4e0c7c3f authored by Philip Hofer's avatar Philip Hofer Committed by David Chase

cmd/compile: de-virtualize interface calls

With this change, code like

    h := sha1.New()
    h.Write(buf)
    sum := h.Sum()

gets compiled into static calls rather than
interface calls, because the compiler is able
to prove that 'h' is really a *sha1.digest.

The InterCall re-write rule hits a few dozen times
during make.bash, and hundreds of times during all.bash.

The most common pattern identified by the compiler
is a constructor like

    func New() Interface { return &impl{...} }

where the constructor gets inlined into the caller,
and the result is used immediately. Examples include
{sha1,md5,crc32,crc64,...}.New, base64.NewEncoder,
base64.NewDecoder, errors.New, net.Pipe, and so on.

Some existing benchmarks that change on darwin/amd64:

Crc64/ISO4KB-8        2.67µs ± 1%    2.66µs ± 0%  -0.36%  (p=0.015 n=10+10)
Crc64/ISO1KB-8         694ns ± 0%     690ns ± 1%  -0.59%  (p=0.001 n=10+10)
Adler32KB-8            473ns ± 1%     471ns ± 0%  -0.39%  (p=0.010 n=10+9)

On architectures like amd64, the reduction in code size
appears to contribute more to benchmark improvements than just
removing the indirect call, since that branch gets predicted
accurately when called in a loop.

Updates #19361

Change-Id: Ia9d30afdd5f6b4d38d38b14b88f308acae8ce7ed
Reviewed-on: https://go-review.googlesource.com/37751
Run-TryBot: Philip Hofer <phofer@umich.edu>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarKeith Randall <khr@golang.org>
parent 26e726c3
......@@ -483,7 +483,11 @@ func Main() {
}
}
// Just before compilation, compile itabs found on
// the right side of OCONVIFACE so that methods
// can be de-virtualized during compilation.
Curfn = nil
peekitabs()
// Phase 8: Compile top level functions.
// Don't use range--walk can add functions to xtop.
......
......@@ -16,6 +16,15 @@ import (
type itabEntry struct {
t, itype *Type
sym *Sym
// symbol of the itab itself;
// filled in lazily after typecheck
lsym *obj.LSym
// symbols of each method in
// the itab, sorted by byte offset;
// filled in at the same time as lsym
entries []*obj.LSym
}
type ptabEntry struct {
......@@ -415,7 +424,6 @@ func imethods(t *Type) []*Sig {
// Generate the method body, so that compiled
// code can refer to it.
isym := methodsym(method, t, 0)
if !isym.Siggen() {
isym.SetSiggen(true)
genwrapper(t, f, isym, 0)
......@@ -1379,6 +1387,78 @@ ok:
return s
}
// for each itabEntry, gather the methods on
// the concrete type that implement the interface
func peekitabs() {
for i := range itabs {
tab := &itabs[i]
methods := genfun(tab.t, tab.itype)
if len(methods) == 0 {
continue
}
tab.lsym = Linksym(tab.sym)
tab.entries = methods
}
}
// for the given concrete type and interface
// type, return the (sorted) set of methods
// on the concrete type that implement the interface
func genfun(t, it *Type) []*obj.LSym {
if t == nil || it == nil {
return nil
}
sigs := imethods(it)
methods := methods(t)
out := make([]*obj.LSym, 0, len(sigs))
if len(sigs) == 0 {
return nil
}
// both sigs and methods are sorted by name,
// so we can find the intersect in a single pass
for _, m := range methods {
if m.name == sigs[0].name {
out = append(out, Linksym(m.isym))
sigs = sigs[1:]
if len(sigs) == 0 {
break
}
}
}
return out
}
// itabsym uses the information gathered in
// peekitabs to de-virtualize interface methods.
// Since this is called by the SSA backend, it shouldn't
// generate additional Nodes, Syms, etc.
func itabsym(it *obj.LSym, offset int64) *obj.LSym {
var syms []*obj.LSym
if it == nil {
return nil
}
for i := range itabs {
e := &itabs[i]
if e.lsym == it {
syms = e.entries
break
}
}
if syms == nil {
return nil
}
// keep this arithmetic in sync with *itab layout
methodnum := int((offset - 3*int64(Widthptr) - 8) / int64(Widthptr))
if methodnum >= len(syms) {
return nil
}
return syms[methodnum]
}
func dumptypestructs() {
// copy types from externdcl list to signatlist
for _, n := range externdcl {
......
......@@ -4931,6 +4931,10 @@ func (e *ssaExport) SplitArray(name ssa.LocalSlot) ssa.LocalSlot {
return ssa.LocalSlot{N: n, Type: et, Off: name.Off}
}
func (e *ssaExport) DerefItab(it *obj.LSym, offset int64) *obj.LSym {
return itabsym(it, offset)
}
// namedAuto returns a new AUTO variable with the given name and type.
// These are exposed to the debugger.
func (e *ssaExport) namedAuto(name string, typ ssa.Type) ssa.GCNode {
......
......@@ -1684,7 +1684,6 @@ func structargs(tl *Type, mustname bool) []*Node {
// rcvr - U
// method - M func (t T)(), a TFIELD type struct
// newnam - the eventual mangled name of this function
func genwrapper(rcvr *Type, method *Field, newnam *Sym, iface int) {
if false && Debug['r'] != 0 {
fmt.Printf("genwrapper rcvrtype=%v method=%v newnam=%v\n", rcvr, method, newnam)
......@@ -1720,6 +1719,7 @@ func genwrapper(rcvr *Type, method *Field, newnam *Sym, iface int) {
fn.Func.Nname = newname(newnam)
fn.Func.Nname.Name.Defn = fn
fn.Func.Nname.Name.Param.Ntype = t
fn.Func.Nname.Sym.SetExported(true) // prevent export; see closure.go
declare(fn.Func.Nname, PFUNC)
funchdr(fn)
......@@ -1923,6 +1923,14 @@ func implements(t, iface *Type, m, samename **Field, ptr *int) bool {
}
}
// We're going to emit an OCONVIFACE.
// Call itabname so that (t, iface)
// gets added to itabs early, which allows
// us to de-virtualize calls through this
// type/interface pair later. See peekitabs in reflect.go
if isdirectiface(t0) && !iface.IsEmptyInterface() {
itabname(t0, iface)
}
return true
}
......
......@@ -121,6 +121,12 @@ type Frontend interface {
SplitArray(LocalSlot) LocalSlot // array must be length 1
SplitInt64(LocalSlot) (LocalSlot, LocalSlot) // returns (hi, lo)
// DerefItab dereferences an itab function
// entry, given the symbol of the itab and
// the byte offset of the function pointer.
// It may return nil.
DerefItab(sym *obj.LSym, offset int64) *obj.LSym
// Line returns a string describing the given position.
Line(src.XPos) string
......
......@@ -97,21 +97,22 @@ func (d DummyFrontend) Warnl(_ src.XPos, msg string, args ...interface{}) { d.t
func (d DummyFrontend) Debug_checknil() bool { return false }
func (d DummyFrontend) Debug_wb() bool { return false }
func (d DummyFrontend) TypeBool() Type { return TypeBool }
func (d DummyFrontend) TypeInt8() Type { return TypeInt8 }
func (d DummyFrontend) TypeInt16() Type { return TypeInt16 }
func (d DummyFrontend) TypeInt32() Type { return TypeInt32 }
func (d DummyFrontend) TypeInt64() Type { return TypeInt64 }
func (d DummyFrontend) TypeUInt8() Type { return TypeUInt8 }
func (d DummyFrontend) TypeUInt16() Type { return TypeUInt16 }
func (d DummyFrontend) TypeUInt32() Type { return TypeUInt32 }
func (d DummyFrontend) TypeUInt64() Type { return TypeUInt64 }
func (d DummyFrontend) TypeFloat32() Type { return TypeFloat32 }
func (d DummyFrontend) TypeFloat64() Type { return TypeFloat64 }
func (d DummyFrontend) TypeInt() Type { return TypeInt64 }
func (d DummyFrontend) TypeUintptr() Type { return TypeUInt64 }
func (d DummyFrontend) TypeString() Type { panic("unimplemented") }
func (d DummyFrontend) TypeBytePtr() Type { return TypeBytePtr }
func (d DummyFrontend) TypeBool() Type { return TypeBool }
func (d DummyFrontend) TypeInt8() Type { return TypeInt8 }
func (d DummyFrontend) TypeInt16() Type { return TypeInt16 }
func (d DummyFrontend) TypeInt32() Type { return TypeInt32 }
func (d DummyFrontend) TypeInt64() Type { return TypeInt64 }
func (d DummyFrontend) TypeUInt8() Type { return TypeUInt8 }
func (d DummyFrontend) TypeUInt16() Type { return TypeUInt16 }
func (d DummyFrontend) TypeUInt32() Type { return TypeUInt32 }
func (d DummyFrontend) TypeUInt64() Type { return TypeUInt64 }
func (d DummyFrontend) TypeFloat32() Type { return TypeFloat32 }
func (d DummyFrontend) TypeFloat64() Type { return TypeFloat64 }
func (d DummyFrontend) TypeInt() Type { return TypeInt64 }
func (d DummyFrontend) TypeUintptr() Type { return TypeUInt64 }
func (d DummyFrontend) TypeString() Type { panic("unimplemented") }
func (d DummyFrontend) TypeBytePtr() Type { return TypeBytePtr }
func (d DummyFrontend) DerefItab(sym *obj.LSym, off int64) *obj.LSym { return nil }
func (d DummyFrontend) CanSSA(t Type) bool {
// There are no un-SSAable types in dummy land.
......
......@@ -1431,3 +1431,10 @@
&& c == config.ctxt.FixedFrameSize() + config.RegSize // offset of return value
&& warnRule(config.Debug_checknil() && v.Pos.Line() > 1, v, "removed nil check")
-> (Invalid)
// De-virtualize interface calls into static calls.
// Note that (ITab (IMake)) doesn't get
// rewritten until after the first opt pass,
// so this rule should trigger reliably.
(InterCall [argsize] (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) mem) && devirt(v, itab, off) != nil ->
(StaticCall [argsize] {devirt(v, itab, off)} mem)
......@@ -5,6 +5,7 @@
package ssa
import (
"cmd/internal/obj"
"crypto/sha1"
"fmt"
"math"
......@@ -384,6 +385,25 @@ func uaddOvf(a, b int64) bool {
return uint64(a)+uint64(b) < uint64(a)
}
// de-virtualize an InterCall
// 'sym' is the symbol for the itab
func devirt(v *Value, sym interface{}, offset int64) *obj.LSym {
f := v.Block.Func
ext, ok := sym.(*ExternSymbol)
if !ok {
return nil
}
lsym := f.Config.Frontend().DerefItab(ext.Sym, offset)
if f.pass.debug > 0 {
if lsym != nil {
f.Config.Warnl(v.Pos, "de-virtualizing call")
} else {
f.Config.Warnl(v.Pos, "couldn't de-virtualize call")
}
}
return lsym
}
// isSamePtr reports whether p1 and p2 point to the same address.
func isSamePtr(p1, p2 *Value) bool {
if p1 == p2 {
......
......@@ -124,6 +124,8 @@ func rewriteValuegeneric(v *Value, config *Config) bool {
return rewriteValuegeneric_OpGreater8U(v, config)
case OpIMake:
return rewriteValuegeneric_OpIMake(v, config)
case OpInterCall:
return rewriteValuegeneric_OpInterCall(v, config)
case OpIsInBounds:
return rewriteValuegeneric_OpIsInBounds(v, config)
case OpIsNonNil:
......@@ -5736,6 +5738,52 @@ func rewriteValuegeneric_OpIMake(v *Value, config *Config) bool {
}
return false
}
func rewriteValuegeneric_OpInterCall(v *Value, config *Config) bool {
b := v.Block
_ = b
// match: (InterCall [argsize] (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) mem)
// cond: devirt(v, itab, off) != nil
// result: (StaticCall [argsize] {devirt(v, itab, off)} mem)
for {
argsize := v.AuxInt
v_0 := v.Args[0]
if v_0.Op != OpLoad {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpOffPtr {
break
}
off := v_0_0.AuxInt
v_0_0_0 := v_0_0.Args[0]
if v_0_0_0.Op != OpITab {
break
}
v_0_0_0_0 := v_0_0_0.Args[0]
if v_0_0_0_0.Op != OpIMake {
break
}
v_0_0_0_0_0 := v_0_0_0_0.Args[0]
if v_0_0_0_0_0.Op != OpAddr {
break
}
itab := v_0_0_0_0_0.Aux
v_0_0_0_0_0_0 := v_0_0_0_0_0.Args[0]
if v_0_0_0_0_0_0.Op != OpSB {
break
}
mem := v.Args[1]
if !(devirt(v, itab, off) != nil) {
break
}
v.reset(OpStaticCall)
v.AuxInt = argsize
v.Aux = devirt(v, itab, off)
v.AddArg(mem)
return true
}
return false
}
func rewriteValuegeneric_OpIsInBounds(v *Value, config *Config) bool {
b := v.Block
_ = b
......
// errorcheck -0 -d=ssa/opt/debug=3
package main
import (
"crypto/sha1"
"errors"
"fmt"
"sync"
)
func f0() {
v := errors.New("error string")
_ = v.Error() // ERROR "de-virtualizing call$"
}
func f1() {
h := sha1.New()
buf := make([]byte, 4)
h.Write(buf) // ERROR "de-virtualizing call$"
_ = h.Sum(nil) // ERROR "de-virtualizing call$"
}
func f2() {
// trickier case: make sure we see this is *sync.rlocker
// instead of *sync.RWMutex,
// even though they are the same pointers
var m sync.RWMutex
r := m.RLocker()
// deadlock if the type of 'r' is improperly interpreted
// as *sync.RWMutex
r.Lock() // ERROR "de-virtualizing call$"
m.RLock()
r.Unlock() // ERROR "de-virtualizing call$"
m.RUnlock()
}
type multiword struct{ a, b, c int }
func (m multiword) Error() string { return fmt.Sprintf("%d, %d, %d", m.a, m.b, m.c) }
func f3() {
// can't de-virtualize this one yet;
// it passes through a call to iconvT2I
var err error
err = multiword{1, 2, 3}
if err.Error() != "1, 2, 3" {
panic("bad call")
}
// ... but we can do this one
err = &multiword{1, 2, 3}
if err.Error() != "1, 2, 3" { // ERROR "de-virtualizing call$"
panic("bad call")
}
}
func main() {
f0()
f1()
f2()
f3()
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment