Commit 320ddcf8 authored by Keith Randall's avatar Keith Randall

cmd/compile: inline atomics from runtime/internal/atomic on amd64

Inline atomic reads and writes on amd64.  There's no reason
to pay the overhead of a call for these.

To keep atomic loads from being reordered, we make them
return a <value,memory> tuple.

Change the meaning of resultInArg0 for tuple-generating ops
to mean the first part of the result tuple, not the second.
This means we can always put the store part of the tuple last,
matching how arguments are laid out.  This requires reordering
the outputs of add32carry and sub32carry and their descendents
in various architectures.

benchmark                    old ns/op     new ns/op     delta
BenchmarkAtomicLoad64-8      2.09          0.26          -87.56%
BenchmarkAtomicStore64-8     7.54          5.72          -24.14%

TBD (in a different CL): Cas, Or8, ...

Change-Id: I713ea88e7da3026c44ea5bdb56ed094b20bc5207
Reviewed-on: https://go-review.googlesource.com/27641Reviewed-by: 's avatarCherry Zhang <cherryyz@google.com>
parent 71ab9fa3
...@@ -935,7 +935,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -935,7 +935,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore,
ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVOload,
ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore: ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVOstore,
ssa.OpAMD64MOVQatomicload, ssa.OpAMD64MOVLatomicload:
if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage { if w.Args[0] == v.Args[0] && w.Aux == nil && w.AuxInt >= 0 && w.AuxInt < minZeroPage {
if gc.Debug_checknil != 0 && int(v.Line) > 1 { if gc.Debug_checknil != 0 && int(v.Line) > 1 {
gc.Warnl(v.Line, "removed nil check") gc.Warnl(v.Line, "removed nil check")
...@@ -951,7 +952,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -951,7 +952,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
return return
} }
} }
if w.Type.IsMemory() { if w.Type.IsMemory() || w.Type.IsTuple() && w.Type.FieldType(1).IsMemory() {
if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive { if w.Op == ssa.OpVarDef || w.Op == ssa.OpVarKill || w.Op == ssa.OpVarLive {
// these ops are OK // these ops are OK
mem = w mem = w
...@@ -976,6 +977,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -976,6 +977,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers if gc.Debug_checknil != 0 && v.Line > 1 { // v.Line==1 in generated wrappers
gc.Warnl(v.Line, "generated nil check") gc.Warnl(v.Line, "generated nil check")
} }
case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = gc.SSARegNum(v.Args[0])
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum0(v)
case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
r := gc.SSARegNum0(v)
if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
}
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r
p.To.Type = obj.TYPE_MEM
p.To.Reg = gc.SSARegNum(v.Args[1])
gc.AddAux(&p.To, v)
default: default:
v.Unimplementedf("genValue not implemented: %s", v.LongString()) v.Unimplementedf("genValue not implemented: %s", v.LongString())
} }
......
...@@ -283,7 +283,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -283,7 +283,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = r p.To.Reg = r
case ssa.OpARMADDS, case ssa.OpARMADDS,
ssa.OpARMSUBS: ssa.OpARMSUBS:
r := gc.SSARegNum1(v) r := gc.SSARegNum0(v)
r1 := gc.SSARegNum(v.Args[0]) r1 := gc.SSARegNum(v.Args[0])
r2 := gc.SSARegNum(v.Args[1]) r2 := gc.SSARegNum(v.Args[1])
p := gc.Prog(v.Op.Asm()) p := gc.Prog(v.Op.Asm())
...@@ -356,7 +356,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -356,7 +356,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Offset = v.AuxInt p.From.Offset = v.AuxInt
p.Reg = gc.SSARegNum(v.Args[0]) p.Reg = gc.SSARegNum(v.Args[0])
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum1(v) p.To.Reg = gc.SSARegNum0(v)
case ssa.OpARMSRRconst: case ssa.OpARMSRRconst:
genshift(arm.AMOVW, 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_RR, v.AuxInt) genshift(arm.AMOVW, 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_RR, v.AuxInt)
case ssa.OpARMADDshiftLL, case ssa.OpARMADDshiftLL,
...@@ -373,7 +373,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -373,7 +373,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftLL, case ssa.OpARMADDSshiftLL,
ssa.OpARMSUBSshiftLL, ssa.OpARMSUBSshiftLL,
ssa.OpARMRSBSshiftLL: ssa.OpARMRSBSshiftLL:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LL, v.AuxInt) p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_LL, v.AuxInt)
p.Scond = arm.C_SBIT p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRL, case ssa.OpARMADDshiftRL,
ssa.OpARMADCshiftRL, ssa.OpARMADCshiftRL,
...@@ -389,7 +389,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -389,7 +389,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRL, case ssa.OpARMADDSshiftRL,
ssa.OpARMSUBSshiftRL, ssa.OpARMSUBSshiftRL,
ssa.OpARMRSBSshiftRL: ssa.OpARMRSBSshiftRL:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_LR, v.AuxInt) p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_LR, v.AuxInt)
p.Scond = arm.C_SBIT p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRA, case ssa.OpARMADDshiftRA,
ssa.OpARMADCshiftRA, ssa.OpARMADCshiftRA,
...@@ -405,7 +405,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -405,7 +405,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRA, case ssa.OpARMADDSshiftRA,
ssa.OpARMSUBSshiftRA, ssa.OpARMSUBSshiftRA,
ssa.OpARMRSBSshiftRA: ssa.OpARMRSBSshiftRA:
p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum1(v), arm.SHIFT_AR, v.AuxInt) p := genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum0(v), arm.SHIFT_AR, v.AuxInt)
p.Scond = arm.C_SBIT p.Scond = arm.C_SBIT
case ssa.OpARMMVNshiftLL: case ssa.OpARMMVNshiftLL:
genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt) genshift(v.Op.Asm(), 0, gc.SSARegNum(v.Args[0]), gc.SSARegNum(v), arm.SHIFT_LL, v.AuxInt)
...@@ -433,7 +433,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -433,7 +433,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftLLreg, case ssa.OpARMADDSshiftLLreg,
ssa.OpARMSUBSshiftLLreg, ssa.OpARMSUBSshiftLLreg,
ssa.OpARMRSBSshiftLLreg: ssa.OpARMRSBSshiftLLreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LL) p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_LL)
p.Scond = arm.C_SBIT p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRLreg, case ssa.OpARMADDshiftRLreg,
ssa.OpARMADCshiftRLreg, ssa.OpARMADCshiftRLreg,
...@@ -449,7 +449,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -449,7 +449,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRLreg, case ssa.OpARMADDSshiftRLreg,
ssa.OpARMSUBSshiftRLreg, ssa.OpARMSUBSshiftRLreg,
ssa.OpARMRSBSshiftRLreg: ssa.OpARMRSBSshiftRLreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_LR) p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_LR)
p.Scond = arm.C_SBIT p.Scond = arm.C_SBIT
case ssa.OpARMADDshiftRAreg, case ssa.OpARMADDshiftRAreg,
ssa.OpARMADCshiftRAreg, ssa.OpARMADCshiftRAreg,
...@@ -465,7 +465,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -465,7 +465,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMADDSshiftRAreg, case ssa.OpARMADDSshiftRAreg,
ssa.OpARMSUBSshiftRAreg, ssa.OpARMSUBSshiftRAreg,
ssa.OpARMRSBSshiftRAreg: ssa.OpARMRSBSshiftRAreg:
p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum1(v), arm.SHIFT_AR) p := genregshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v.Args[2]), gc.SSARegNum0(v), arm.SHIFT_AR)
p.Scond = arm.C_SBIT p.Scond = arm.C_SBIT
case ssa.OpARMHMUL, case ssa.OpARMHMUL,
ssa.OpARMHMULU: ssa.OpARMHMULU:
......
...@@ -477,7 +477,7 @@ func inlnode(n *Node) *Node { ...@@ -477,7 +477,7 @@ func inlnode(n *Node) *Node {
if Debug['m'] > 3 { if Debug['m'] > 3 {
fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign)) fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign))
} }
if n.Left.Func != nil && n.Left.Func.Inl.Len() != 0 && !isIntrinsicCall1(n) { // normal case if n.Left.Func != nil && n.Left.Func.Inl.Len() != 0 && !isIntrinsicCall(n) { // normal case
n = mkinlcall(n, n.Left, n.Isddd) n = mkinlcall(n, n.Left, n.Isddd)
} else if n.isMethodCalledAsFunction() && n.Left.Sym.Def != nil { } else if n.isMethodCalledAsFunction() && n.Left.Sym.Def != nil {
n = mkinlcall(n, n.Left.Sym.Def, n.Isddd) n = mkinlcall(n, n.Left.Sym.Def, n.Isddd)
......
...@@ -571,7 +571,14 @@ func (s *state) stmt(n *Node) { ...@@ -571,7 +571,14 @@ func (s *state) stmt(n *Node) {
case OEMPTY, ODCLCONST, ODCLTYPE, OFALL: case OEMPTY, ODCLCONST, ODCLTYPE, OFALL:
// Expression statements // Expression statements
case OCALLFUNC, OCALLMETH, OCALLINTER: case OCALLFUNC:
if isIntrinsicCall(n) {
s.intrinsicCall(n)
return
}
fallthrough
case OCALLMETH, OCALLINTER:
s.call(n, callNormal) s.call(n, callNormal)
if n.Op == OCALLFUNC && n.Left.Op == ONAME && n.Left.Class == PFUNC && if n.Op == OCALLFUNC && n.Left.Op == ONAME && n.Left.Class == PFUNC &&
(compiling_runtime && n.Left.Sym.Name == "throw" || (compiling_runtime && n.Left.Sym.Name == "throw" ||
...@@ -2107,8 +2114,8 @@ func (s *state) expr(n *Node) *ssa.Value { ...@@ -2107,8 +2114,8 @@ func (s *state) expr(n *Node) *ssa.Value {
return s.newValue2(ssa.OpStringMake, n.Type, p, l) return s.newValue2(ssa.OpStringMake, n.Type, p, l)
case OCALLFUNC: case OCALLFUNC:
if isIntrinsicCall1(n) { if isIntrinsicCall(n) {
return s.intrinsicCall1(n) return s.intrinsicCall(n)
} }
fallthrough fallthrough
...@@ -2516,12 +2523,12 @@ const ( ...@@ -2516,12 +2523,12 @@ const (
callGo callGo
) )
// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic // isSSAIntrinsic returns true if n is a call to a recognized intrinsic
// that can be handled by the SSA backend. // that can be handled by the SSA backend.
// SSA uses this, but so does the front end to see if should not // SSA uses this, but so does the front end to see if should not
// inline a function because it is a candidate for intrinsic // inline a function because it is a candidate for intrinsic
// substitution. // substitution.
func isSSAIntrinsic1(s *Sym) bool { func isSSAIntrinsic(s *Sym) bool {
// The test below is not quite accurate -- in the event that // The test below is not quite accurate -- in the event that
// a function is disabled on a per-function basis, for example // a function is disabled on a per-function basis, for example
// because of hash-keyed binary failure search, SSA might be // because of hash-keyed binary failure search, SSA might be
...@@ -2541,38 +2548,74 @@ func isSSAIntrinsic1(s *Sym) bool { ...@@ -2541,38 +2548,74 @@ func isSSAIntrinsic1(s *Sym) bool {
return true return true
} }
} }
if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/atomic" {
switch s.Name {
case "Load", "Load64", "Loadint64", "Loadp", "Loaduint", "Loaduintptr":
return true
case "Store", "Store64", "StorepNoWB", "Storeuintptr":
return true
}
}
return false return false
} }
func isIntrinsicCall1(n *Node) bool { func isIntrinsicCall(n *Node) bool {
if n == nil || n.Left == nil { if n == nil || n.Left == nil {
return false return false
} }
return isSSAIntrinsic1(n.Left.Sym) return isSSAIntrinsic(n.Left.Sym)
} }
// intrinsicFirstArg extracts arg from n.List and eval // intrinsicArg extracts the ith arg from n.List and returns its value.
func (s *state) intrinsicFirstArg(n *Node) *ssa.Value { func (s *state) intrinsicArg(n *Node, i int) *ssa.Value {
x := n.List.First() x := n.List.Slice()[i]
if x.Op == OAS { if x.Op == OAS {
x = x.Right x = x.Right
} }
return s.expr(x) return s.expr(x)
} }
func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
return s.intrinsicArg(n, 0)
}
// intrinsicCall1 converts a call to a recognized 1-arg intrinsic // intrinsicCall converts a call to a recognized intrinsic function into the intrinsic SSA operation.
// into the intrinsic func (s *state) intrinsicCall(n *Node) (ret *ssa.Value) {
func (s *state) intrinsicCall1(n *Node) *ssa.Value {
var result *ssa.Value var result *ssa.Value
switch n.Left.Sym.Name { name := n.Left.Sym.Name
case "Ctz64": switch {
case name == "Ctz64":
result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n)) result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
case "Ctz32": ret = result
case name == "Ctz32":
result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n)) result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
case "Bswap64": ret = result
case name == "Bswap64":
result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n)) result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
case "Bswap32": ret = result
case name == "Bswap32":
result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n)) result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
ret = result
case name == "Load" || name == "Loaduint" && s.config.IntSize == 4 || name == "Loaduintptr" && s.config.PtrSize == 4:
result = s.newValue2(ssa.OpAtomicLoad32, ssa.MakeTuple(Types[TUINT32], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
ret = s.newValue1(ssa.OpSelect0, Types[TUINT32], result)
case name == "Load64" || name == "Loadint64" || name == "Loaduint" && s.config.IntSize == 8 || name == "Loaduintptr" && s.config.PtrSize == 8:
result = s.newValue2(ssa.OpAtomicLoad64, ssa.MakeTuple(Types[TUINT64], ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
ret = s.newValue1(ssa.OpSelect0, Types[TUINT64], result)
case name == "Loadp":
result = s.newValue2(ssa.OpAtomicLoadPtr, ssa.MakeTuple(Ptrto(Types[TUINT8]), ssa.TypeMem), s.intrinsicArg(n, 0), s.mem())
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, ssa.TypeMem, result)
ret = s.newValue1(ssa.OpSelect0, Ptrto(Types[TUINT8]), result)
case name == "Store" || name == "Storeuintptr" && s.config.PtrSize == 4:
result = s.newValue3(ssa.OpAtomicStore32, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = result
case name == "Store64" || name == "Storeuintptr" && s.config.PtrSize == 8:
result = s.newValue3(ssa.OpAtomicStore64, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = result
case name == "StorepNoWB":
result = s.newValue3(ssa.OpAtomicStorePtrNoWB, ssa.TypeMem, s.intrinsicArg(n, 0), s.intrinsicArg(n, 1), s.mem())
s.vars[&memVar] = result
} }
if result == nil { if result == nil {
Fatalf("Unknown special call: %v", n.Left.Sym) Fatalf("Unknown special call: %v", n.Left.Sym)
...@@ -2580,7 +2623,7 @@ func (s *state) intrinsicCall1(n *Node) *ssa.Value { ...@@ -2580,7 +2623,7 @@ func (s *state) intrinsicCall1(n *Node) *ssa.Value {
if ssa.IntrinsicsDebug > 0 { if ssa.IntrinsicsDebug > 0 {
Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString()) Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString())
} }
return result return
} }
// Calls the function n using the specified call type. // Calls the function n using the specified call type.
......
...@@ -29,6 +29,10 @@ func dse(f *Func) { ...@@ -29,6 +29,10 @@ func dse(f *Func) {
} }
if v.Type.IsMemory() { if v.Type.IsMemory() {
stores = append(stores, v) stores = append(stores, v)
if v.Op == OpSelect1 {
// Use the args of the tuple-generating op.
v = v.Args[0]
}
for _, a := range v.Args { for _, a := range v.Args {
if a.Block == b && a.Type.IsMemory() { if a.Block == b && a.Type.IsMemory() {
storeUse.add(a.ID) storeUse.add(a.ID)
......
...@@ -106,8 +106,8 @@ func init() { ...@@ -106,8 +106,8 @@ func init() {
gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly} gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{0, gp}} gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{0, gp}} gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly} gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
......
...@@ -464,6 +464,19 @@ ...@@ -464,6 +464,19 @@
(If cond yes no) -> (NE (TESTB cond cond) yes no) (If cond yes no) -> (NE (TESTB cond cond) yes no)
// Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here.
(AtomicLoad32 ptr mem) -> (MOVLatomicload ptr mem)
(AtomicLoad64 ptr mem) -> (MOVQatomicload ptr mem)
(AtomicLoadPtr ptr mem) && config.PtrSize == 8 -> (MOVQatomicload ptr mem)
(AtomicLoadPtr ptr mem) && config.PtrSize == 4 -> (MOVLatomicload ptr mem)
// Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load.
// TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those?
(AtomicStore32 ptr val mem) -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeUInt32(),TypeMem)> val ptr mem))
(AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeUInt64(),TypeMem)> val ptr mem))
(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 8 -> (Select1 (XCHGQ <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
(AtomicStorePtrNoWB ptr val mem) && config.PtrSize == 4 -> (Select1 (XCHGL <MakeTuple(config.Frontend().TypeBytePtr(),TypeMem)> val ptr mem))
// *************************** // ***************************
// Above: lowering rules // Above: lowering rules
// Below: optimizations // Below: optimizations
...@@ -1626,3 +1639,23 @@ ...@@ -1626,3 +1639,23 @@
(MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
(MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
(MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
// Merge ADDQconst and LEAQ into atomic loads.
(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
(MOVQatomicload [off1+off2] {sym} ptr mem)
(MOVLatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
(MOVLatomicload [off1+off2] {sym} ptr mem)
(MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVLatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
(MOVLatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
// Merge ADDQconst and LEAQ into atomic stores.
(XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
(XCHGQ [off1+off2] {sym} val ptr mem)
(XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
(XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
(XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
(XCHGL [off1+off2] {sym} val ptr mem)
(XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB ->
(XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
...@@ -134,6 +134,7 @@ func init() { ...@@ -134,6 +134,7 @@ func init() {
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}} gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
gpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}} gpstoreidx = regInfo{inputs: []regMask{gpspsb, gpsp, gpsp, 0}}
gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}} gpstoreconstidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstorexchg = regInfo{inputs: []regMask{gp, gp, 0}, outputs: []regMask{gp}}
fp01 = regInfo{inputs: nil, outputs: fponly} fp01 = regInfo{inputs: nil, outputs: fponly}
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly} fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
...@@ -509,6 +510,20 @@ func init() { ...@@ -509,6 +510,20 @@ func init() {
{name: "FlagLT_UGT"}, // signed < and unsigned > {name: "FlagLT_UGT"}, // signed < and unsigned >
{name: "FlagGT_UGT"}, // signed > and unsigned < {name: "FlagGT_UGT"}, // signed > and unsigned <
{name: "FlagGT_ULT"}, // signed > and unsigned > {name: "FlagGT_ULT"}, // signed > and unsigned >
// Atomic loads. These are just normal loads but return <value,memory> tuples
// so they can be properly ordered with other loads.
// load from arg0+auxint+aux. arg1=mem.
{name: "MOVLatomicload", argLength: 2, reg: gpload, asm: "MOVL", aux: "SymOff"},
{name: "MOVQatomicload", argLength: 2, reg: gpload, asm: "MOVQ", aux: "SymOff"},
// Atomic stores. We use XCHG to get the right memory ordering semantics.
// These ops return a tuple of <old memory contents, memory>. The old contents are
// ignored for now but they are allocated to a register so that the argument register
// is properly clobbered (together with resultInArg0).
// store arg0 to arg1+auxint+aux, arg2=mem.
// Note: arg0 and arg1 are backwards compared to MOVLstore (to facilitate resultInArg0)!
{name: "XCHGL", argLength: 3, reg: gpstorexchg, asm: "XCHGL", aux: "SymOff", resultInArg0: true},
{name: "XCHGQ", argLength: 3, reg: gpstorexchg, asm: "XCHGQ", aux: "SymOff", resultInArg0: true},
} }
var AMD64blocks = []blockData{ var AMD64blocks = []blockData{
......
...@@ -99,17 +99,17 @@ func init() { ...@@ -99,17 +99,17 @@ func init() {
var ( var (
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{0, gp}} gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
gp1flags = regInfo{inputs: []regMask{gpg}} gp1flags = regInfo{inputs: []regMask{gpg}}
gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}} gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{0, gp}} gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
gp2flags = regInfo{inputs: []regMask{gpg, gpg}} gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{0, gp}} gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
gp3flags = regInfo{inputs: []regMask{gp, gp, gp}} gp3flags = regInfo{inputs: []regMask{gp, gp, gp}}
gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
......
...@@ -39,16 +39,16 @@ ...@@ -39,16 +39,16 @@
(Add32withcarry <config.fe.TypeInt32()> (Add32withcarry <config.fe.TypeInt32()>
(Int64Hi x) (Int64Hi x)
(Int64Hi y) (Int64Hi y)
(Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select1 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
(Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
(Sub64 x y) -> (Sub64 x y) ->
(Int64Make (Int64Make
(Sub32withcarry <config.fe.TypeInt32()> (Sub32withcarry <config.fe.TypeInt32()>
(Int64Hi x) (Int64Hi x)
(Int64Hi y) (Int64Hi y)
(Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select1 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
(Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
(Mul64 x y) -> (Mul64 x y) ->
(Int64Make (Int64Make
......
...@@ -417,10 +417,10 @@ var genericOps = []opData{ ...@@ -417,10 +417,10 @@ var genericOps = []opData{
{name: "Int64Hi", argLength: 1, typ: "UInt32"}, // high 32-bit of arg0 {name: "Int64Hi", argLength: 1, typ: "UInt32"}, // high 32-bit of arg0
{name: "Int64Lo", argLength: 1, typ: "UInt32"}, // low 32-bit of arg0 {name: "Int64Lo", argLength: 1, typ: "UInt32"}, // low 32-bit of arg0
{name: "Add32carry", argLength: 2, commutative: true, typ: "(Flags,UInt32)"}, // arg0 + arg1, returns (carry, value) {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry)
{name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1) {name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1)
{name: "Sub32carry", argLength: 2, typ: "(Flags,UInt32)"}, // arg0 - arg1, returns (carry, value) {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry)
{name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1) {name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1)
{name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo) {name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
...@@ -440,6 +440,17 @@ var genericOps = []opData{ ...@@ -440,6 +440,17 @@ var genericOps = []opData{
// pseudo-ops for breaking Tuple // pseudo-ops for breaking Tuple
{name: "Select0", argLength: 1}, // the first component of a tuple {name: "Select0", argLength: 1}, // the first component of a tuple
{name: "Select1", argLength: 1}, // the second component of a tuple {name: "Select1", argLength: 1}, // the second component of a tuple
// Atomic operations used for semantically inlining runtime/internal/atomic.
// Atomic loads return a new memory so that the loads are properly ordered
// with respect to other loads and stores.
// TODO: use for sync/atomic at some point.
{name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicStore32", argLength: 3, typ: "Mem"}, // Store arg1 to arg0. arg2=memory. Returns memory.
{name: "AtomicStore64", argLength: 3, typ: "Mem"}, // Store arg1 to arg0. arg2=memory. Returns memory.
{name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem"}, // Store arg1 to arg0. arg2=memory. Returns memory.
} }
// kind control successors implicit exit // kind control successors implicit exit
......
...@@ -43,7 +43,7 @@ type opData struct { ...@@ -43,7 +43,7 @@ type opData struct {
rematerializeable bool rematerializeable bool
argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments
commutative bool // this operation is commutative on its first 2 arguments (e.g. addition) commutative bool // this operation is commutative on its first 2 arguments (e.g. addition)
resultInArg0 bool // last output of v and v.Args[0] must be allocated to the same register resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
clobberFlags bool // this op clobbers flags register clobberFlags bool // this op clobbers flags register
} }
...@@ -161,11 +161,11 @@ func genOp() { ...@@ -161,11 +161,11 @@ func genOp() {
} }
if v.resultInArg0 { if v.resultInArg0 {
fmt.Fprintln(w, "resultInArg0: true,") fmt.Fprintln(w, "resultInArg0: true,")
if v.reg.inputs[0] != v.reg.outputs[len(v.reg.outputs)-1] { if v.reg.inputs[0] != v.reg.outputs[0] {
log.Fatalf("input[0] and last output register must be equal for %s", v.name) log.Fatalf("input[0] and output[0] must use the same registers for %s", v.name)
} }
if v.commutative && v.reg.inputs[1] != v.reg.outputs[len(v.reg.outputs)-1] { if v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
log.Fatalf("input[1] and last output register must be equal for %s", v.name) log.Fatalf("input[1] and output[0] must use the same registers for %s", v.name)
} }
} }
if v.clobberFlags { if v.clobberFlags {
......
This diff is collapsed.
...@@ -1204,7 +1204,7 @@ func (s *regAllocState) regalloc(f *Func) { ...@@ -1204,7 +1204,7 @@ func (s *regAllocState) regalloc(f *Func) {
if mask == 0 { if mask == 0 {
continue continue
} }
if opcodeTable[v.Op].resultInArg0 && out.idx == len(regspec.outputs)-1 { if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
if !opcodeTable[v.Op].commutative { if !opcodeTable[v.Op].commutative {
// Output must use the same register as input 0. // Output must use the same register as input 0.
r := register(s.f.getHome(args[0].ID).(*Register).Num) r := register(s.f.getHome(args[0].ID).(*Register).Num)
......
...@@ -126,7 +126,7 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool { ...@@ -126,7 +126,7 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
_ = b _ = b
// match: (Add64 x y) // match: (Add64 x y)
// cond: // cond:
// result: (Int64Make (Add32withcarry <config.fe.TypeInt32()> (Int64Hi x) (Int64Hi y) (Select0 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select1 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y)))) // result: (Int64Make (Add32withcarry <config.fe.TypeInt32()> (Int64Hi x) (Int64Hi y) (Select1 <TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 <config.fe.TypeUInt32()> (Add32carry (Int64Lo x) (Int64Lo y))))
for { for {
x := v.Args[0] x := v.Args[0]
y := v.Args[1] y := v.Args[1]
...@@ -138,8 +138,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool { ...@@ -138,8 +138,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32()) v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32())
v2.AddArg(y) v2.AddArg(y)
v0.AddArg(v2) v0.AddArg(v2)
v3 := b.NewValue0(v.Line, OpSelect0, TypeFlags) v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
v4 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32())) v4 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32()) v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
v5.AddArg(x) v5.AddArg(x)
v4.AddArg(v5) v4.AddArg(v5)
...@@ -149,8 +149,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool { ...@@ -149,8 +149,8 @@ func rewriteValuedec64_OpAdd64(v *Value, config *Config) bool {
v3.AddArg(v4) v3.AddArg(v4)
v0.AddArg(v3) v0.AddArg(v3)
v.AddArg(v0) v.AddArg(v0)
v7 := b.NewValue0(v.Line, OpSelect1, config.fe.TypeUInt32()) v7 := b.NewValue0(v.Line, OpSelect0, config.fe.TypeUInt32())
v8 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32())) v8 := b.NewValue0(v.Line, OpAdd32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32()) v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
v9.AddArg(x) v9.AddArg(x)
v8.AddArg(v9) v8.AddArg(v9)
...@@ -2361,7 +2361,7 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool { ...@@ -2361,7 +2361,7 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
_ = b _ = b
// match: (Sub64 x y) // match: (Sub64 x y)
// cond: // cond:
// result: (Int64Make (Sub32withcarry <config.fe.TypeInt32()> (Int64Hi x) (Int64Hi y) (Select0 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select1 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y)))) // result: (Int64Make (Sub32withcarry <config.fe.TypeInt32()> (Int64Hi x) (Int64Hi y) (Select1 <TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 <config.fe.TypeUInt32()> (Sub32carry (Int64Lo x) (Int64Lo y))))
for { for {
x := v.Args[0] x := v.Args[0]
y := v.Args[1] y := v.Args[1]
...@@ -2373,8 +2373,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool { ...@@ -2373,8 +2373,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32()) v2 := b.NewValue0(v.Line, OpInt64Hi, config.fe.TypeUInt32())
v2.AddArg(y) v2.AddArg(y)
v0.AddArg(v2) v0.AddArg(v2)
v3 := b.NewValue0(v.Line, OpSelect0, TypeFlags) v3 := b.NewValue0(v.Line, OpSelect1, TypeFlags)
v4 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32())) v4 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32()) v5 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
v5.AddArg(x) v5.AddArg(x)
v4.AddArg(v5) v4.AddArg(v5)
...@@ -2384,8 +2384,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool { ...@@ -2384,8 +2384,8 @@ func rewriteValuedec64_OpSub64(v *Value, config *Config) bool {
v3.AddArg(v4) v3.AddArg(v4)
v0.AddArg(v3) v0.AddArg(v3)
v.AddArg(v0) v.AddArg(v0)
v7 := b.NewValue0(v.Line, OpSelect1, config.fe.TypeUInt32()) v7 := b.NewValue0(v.Line, OpSelect0, config.fe.TypeUInt32())
v8 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(TypeFlags, config.fe.TypeUInt32())) v8 := b.NewValue0(v.Line, OpSub32carry, MakeTuple(config.fe.TypeUInt32(), TypeFlags))
v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32()) v9 := b.NewValue0(v.Line, OpInt64Lo, config.fe.TypeUInt32())
v9.AddArg(x) v9.AddArg(x)
v8.AddArg(v9) v8.AddArg(v9)
......
...@@ -33,7 +33,7 @@ type Type interface { ...@@ -33,7 +33,7 @@ type Type interface {
PtrTo() Type // given T, return *T PtrTo() Type // given T, return *T
NumFields() int // # of fields of a struct NumFields() int // # of fields of a struct
FieldType(i int) Type // type of ith field of the struct FieldType(i int) Type // type of ith field of the struct or ith part of a tuple
FieldOff(i int) int64 // offset of ith field of the struct FieldOff(i int) int64 // offset of ith field of the struct
FieldName(i int) string // name of ith field of the struct FieldName(i int) string // name of ith field of the struct
...@@ -84,31 +84,41 @@ func (t *CompilerType) NumElem() int64 { panic("not implemented") } ...@@ -84,31 +84,41 @@ func (t *CompilerType) NumElem() int64 { panic("not implemented") }
type TupleType struct { type TupleType struct {
first Type first Type
second Type second Type
// Any tuple with a memory type must put that memory type second.
} }
func (t *TupleType) Size() int64 { panic("not implemented") } func (t *TupleType) Size() int64 { panic("not implemented") }
func (t *TupleType) Alignment() int64 { panic("not implemented") } func (t *TupleType) Alignment() int64 { panic("not implemented") }
func (t *TupleType) IsBoolean() bool { return false } func (t *TupleType) IsBoolean() bool { return false }
func (t *TupleType) IsInteger() bool { return false } func (t *TupleType) IsInteger() bool { return false }
func (t *TupleType) IsSigned() bool { return false } func (t *TupleType) IsSigned() bool { return false }
func (t *TupleType) IsFloat() bool { return false } func (t *TupleType) IsFloat() bool { return false }
func (t *TupleType) IsComplex() bool { return false } func (t *TupleType) IsComplex() bool { return false }
func (t *TupleType) IsPtrShaped() bool { return false } func (t *TupleType) IsPtrShaped() bool { return false }
func (t *TupleType) IsString() bool { return false } func (t *TupleType) IsString() bool { return false }
func (t *TupleType) IsSlice() bool { return false } func (t *TupleType) IsSlice() bool { return false }
func (t *TupleType) IsArray() bool { return false } func (t *TupleType) IsArray() bool { return false }
func (t *TupleType) IsStruct() bool { return false } func (t *TupleType) IsStruct() bool { return false }
func (t *TupleType) IsInterface() bool { return false } func (t *TupleType) IsInterface() bool { return false }
func (t *TupleType) IsMemory() bool { return false } func (t *TupleType) IsMemory() bool { return false }
func (t *TupleType) IsFlags() bool { return false } func (t *TupleType) IsFlags() bool { return false }
func (t *TupleType) IsVoid() bool { return false } func (t *TupleType) IsVoid() bool { return false }
func (t *TupleType) IsTuple() bool { return true } func (t *TupleType) IsTuple() bool { return true }
func (t *TupleType) String() string { return t.first.String() + "," + t.second.String() } func (t *TupleType) String() string { return t.first.String() + "," + t.second.String() }
func (t *TupleType) SimpleString() string { return "Tuple" } func (t *TupleType) SimpleString() string { return "Tuple" }
func (t *TupleType) ElemType() Type { panic("not implemented") } func (t *TupleType) ElemType() Type { panic("not implemented") }
func (t *TupleType) PtrTo() Type { panic("not implemented") } func (t *TupleType) PtrTo() Type { panic("not implemented") }
func (t *TupleType) NumFields() int { panic("not implemented") } func (t *TupleType) NumFields() int { panic("not implemented") }
func (t *TupleType) FieldType(i int) Type { panic("not implemented") } func (t *TupleType) FieldType(i int) Type {
switch i {
case 0:
return t.first
case 1:
return t.second
default:
panic("bad tuple index")
}
}
func (t *TupleType) FieldOff(i int) int64 { panic("not implemented") } func (t *TupleType) FieldOff(i int) int64 { panic("not implemented") }
func (t *TupleType) FieldName(i int) string { panic("not implemented") } func (t *TupleType) FieldName(i int) string { panic("not implemented") }
func (t *TupleType) NumElem() int64 { panic("not implemented") } func (t *TupleType) NumElem() int64 { panic("not implemented") }
......
...@@ -196,17 +196,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -196,17 +196,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry: case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
// output 0 is carry/borrow, output 1 is the low 32 bits. // output 0 is carry/borrow, output 1 is the low 32 bits.
r := gc.SSARegNum1(v) r := gc.SSARegNum0(v)
if r != gc.SSARegNum(v.Args[0]) { if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output[1] not in same register %s", v.LongString()) v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
} }
opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1])) opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))
case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry: case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
// output 0 is carry/borrow, output 1 is the low 32 bits. // output 0 is carry/borrow, output 1 is the low 32 bits.
r := gc.SSARegNum1(v) r := gc.SSARegNum0(v)
if r != gc.SSARegNum(v.Args[0]) { if r != gc.SSARegNum(v.Args[0]) {
v.Fatalf("input[0] and output[1] not in same register %s", v.LongString()) v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
} }
p := gc.Prog(v.Op.Asm()) p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST p.From.Type = obj.TYPE_CONST
......
...@@ -2,6 +2,9 @@ ...@@ -2,6 +2,9 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// Note: some of these functions are semantically inlined
// by the compiler (in src/cmd/compile/internal/gc/ssa.go).
#include "textflag.h" #include "textflag.h"
// bool Cas(int32 *val, int32 old, int32 new) // bool Cas(int32 *val, int32 old, int32 new)
......
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package atomic_test
import (
"runtime/internal/atomic"
"testing"
)
var sink interface{}
func BenchmarkAtomicLoad64(b *testing.B) {
var x uint64
sink = &x
for i := 0; i < b.N; i++ {
_ = atomic.Load64(&x)
}
}
func BenchmarkAtomicStore64(b *testing.B) {
var x uint64
sink = &x
for i := 0; i < b.N; i++ {
atomic.Store64(&x, 0)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment