Commit d99cee79 authored by Cherry Zhang's avatar Cherry Zhang

[dev.ssa] cmd/compile, etc.: more ARM64 optimizations, and enable SSA by default

Add more ARM64 optimizations:
- use hardware zero register when it is possible.
- use shifted ops.
  The assembler supports shifted ops but not documented, nor knows
  how to print it. This CL adds them.
- enable fast division.
  This was disabled because it makes the old backend generate slower
  code. But with SSA it generates faster code.

Turn on SSA by default, also adjust tests.

Change-Id: I7794479954c83bb65008dcb457bc1e21d7496da6
Reviewed-on: https://go-review.googlesource.com/26950
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarDavid Chase <drchase@google.com>
parent 94c8e59a
......@@ -17,6 +17,7 @@ import (
func setArch(goarch string) (*arch.Arch, *obj.Link) {
os.Setenv("GOOS", "linux") // obj can handle this OS for all architectures.
os.Setenv("GOARCH", goarch)
architecture := arch.Set(goarch)
if architecture == nil {
panic("asm: unrecognized architecture " + goarch)
......
......@@ -71,6 +71,7 @@ var progtable = [arm64.ALAST & obj.AMask]obj.ProgInfo{
arm64.ACMPW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead},
arm64.AADC & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.UseCarry},
arm64.AROR & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
arm64.ARORW & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
arm64.AADDS & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.SetCarry},
arm64.ACSET & obj.AMask: {Flags: gc.SizeQ | gc.RightWrite},
arm64.ACSEL & obj.AMask: {Flags: gc.SizeQ | gc.RegRead | gc.RightWrite},
......
......@@ -148,6 +148,24 @@ func storeByType(t ssa.Type) obj.As {
panic("bad store type")
}
// makeshift encodes a register shifted by a constant, used as an Offset in Prog
func makeshift(reg int16, typ int64, s int64) int64 {
return int64(reg&31)<<16 | typ | (s&63)<<10
}
// genshift generates a Prog for r = r0 op (r1 shifted by s)
func genshift(as obj.As, r0, r1, r int16, typ int64, s int64) *obj.Prog {
p := gc.Prog(as)
p.From.Type = obj.TYPE_SHIFT
p.From.Offset = makeshift(r1, typ, s)
p.Reg = r0
if r != 0 {
p.To.Type = obj.TYPE_REG
p.To.Reg = r
}
return p
}
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
s.SetLineno(v.Line)
switch v.Op {
......@@ -284,6 +302,27 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.Reg = gc.SSARegNum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
case ssa.OpARM64ADDshiftLL,
ssa.OpARM64SUBshiftLL,
ssa.OpARM64ANDshiftLL,
ssa.OpARM64ORshiftLL,
ssa.OpARM64XORshiftLL,
ssa.OpARM64BICshiftLL:
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LL, v.AuxInt)
case ssa.OpARM64ADDshiftRL,
ssa.OpARM64SUBshiftRL,
ssa.OpARM64ANDshiftRL,
ssa.OpARM64ORshiftRL,
ssa.OpARM64XORshiftRL,
ssa.OpARM64BICshiftRL:
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LR, v.AuxInt)
case ssa.OpARM64ADDshiftRA,
ssa.OpARM64SUBshiftRA,
ssa.OpARM64ANDshiftRA,
ssa.OpARM64ORshiftRA,
ssa.OpARM64XORshiftRA,
ssa.OpARM64BICshiftRA:
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_AR, v.AuxInt)
case ssa.OpARM64MOVDconst:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
......@@ -315,6 +354,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.Reg = gc.SSARegNum(v.Args[0])
case ssa.OpARM64CMPshiftLL:
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LL, v.AuxInt)
case ssa.OpARM64CMPshiftRL:
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LR, v.AuxInt)
case ssa.OpARM64CMPshiftRA:
genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_AR, v.AuxInt)
case ssa.OpARM64MOVDaddr:
p := gc.Prog(arm64.AMOVD)
p.From.Type = obj.TYPE_ADDR
......@@ -372,6 +417,16 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM
p.To.Reg = gc.SSARegNum(v.Args[0])
gc.AddAux(&p.To, v)
case ssa.OpARM64MOVBstorezero,
ssa.OpARM64MOVHstorezero,
ssa.OpARM64MOVWstorezero,
ssa.OpARM64MOVDstorezero:
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
p.To.Type = obj.TYPE_MEM
p.To.Reg = gc.SSARegNum(v.Args[0])
gc.AddAux(&p.To, v)
case ssa.OpARM64MOVBreg,
ssa.OpARM64MOVBUreg,
ssa.OpARM64MOVHreg,
......@@ -433,12 +488,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Reg = gc.SSARegNum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
case ssa.OpARM64CSELULT:
case ssa.OpARM64CSELULT,
ssa.OpARM64CSELULT0:
r1 := int16(arm64.REGZERO)
if v.Op == ssa.OpARM64CSELULT {
r1 = gc.SSARegNum(v.Args[1])
}
p := gc.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
p.From.Reg = arm64.COND_LO
p.Reg = gc.SSARegNum(v.Args[0])
p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: gc.SSARegNum(v.Args[1])}
p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r1}
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
case ssa.OpARM64DUFFZERO:
......
......@@ -40,7 +40,7 @@ func shouldssa(fn *Node) bool {
if os.Getenv("SSATEST") == "" {
return false
}
case "amd64", "amd64p32", "arm", "386":
case "amd64", "amd64p32", "arm", "386", "arm64":
// Generally available.
}
if !ssaEnabled {
......
......@@ -3336,6 +3336,7 @@ func samecheap(a *Node, b *Node) bool {
// The result of walkrotate MUST be assigned back to n, e.g.
// n.Left = walkrotate(n.Left)
func walkrotate(n *Node) *Node {
//TODO: enable LROT on ARM64 once the old backend is gone
if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM64, sys.PPC64) {
return n
}
......@@ -3529,16 +3530,6 @@ func walkdiv(n *Node, init *Nodes) *Node {
goto ret
}
// TODO(zhongwei) Test shows that TUINT8, TINT8, TUINT16 and TINT16's "quick division" method
// on current arm64 backend is slower than hardware div instruction on ARM64 due to unnecessary
// data movement between registers. It could be enabled when generated code is good enough.
if Thearch.LinkArch.Family == sys.ARM64 {
switch Simtype[nl.Type.Etype] {
case TUINT8, TINT8, TUINT16, TINT16:
return n
}
}
switch Simtype[nl.Type.Etype] {
default:
return n
......
......@@ -133,11 +133,11 @@ func init() {
)
// Common regInfo
var (
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
gp1flags = regInfo{inputs: []regMask{gpg}}
//gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
gp1flags = regInfo{inputs: []regMask{gpg}}
gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
......@@ -145,8 +145,9 @@ func init() {
//gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
//gp3flags = regInfo{inputs: []regMask{gp, gp, gp}}
//gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
//gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
//gp2store = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
......@@ -228,6 +229,29 @@ func init() {
{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"}, // arg0 compare to arg1, float32
{name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"}, // arg0 compare to arg1, float64
// shifted ops
{name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"}, // arg0 + arg1<<auxInt
{name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"}, // arg0 + arg1>>auxInt, unsigned shift
{name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"}, // arg0 + arg1>>auxInt, signed shift
{name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"}, // arg0 - arg1<<auxInt
{name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"}, // arg0 - arg1>>auxInt, unsigned shift
{name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"}, // arg0 - arg1>>auxInt, signed shift
{name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"}, // arg0 & (arg1<<auxInt)
{name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"}, // arg0 & (arg1>>auxInt), unsigned shift
{name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"}, // arg0 & (arg1>>auxInt), signed shift
{name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"}, // arg0 | arg1<<auxInt
{name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"}, // arg0 | arg1>>auxInt, unsigned shift
{name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"}, // arg0 | arg1>>auxInt, signed shift
{name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"}, // arg0 ^ arg1<<auxInt
{name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"}, // arg0 ^ arg1>>auxInt, unsigned shift
{name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"}, // arg0 ^ arg1>>auxInt, signed shift
{name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"}, // arg0 &^ (arg1<<auxInt)
{name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"}, // arg0 &^ (arg1>>auxInt), unsigned shift
{name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"}, // arg0 &^ (arg1>>auxInt), signed shift
{name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1<<auxInt
{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
// moves
{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true}, // 32 low bits of auxint
{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
......@@ -252,6 +276,11 @@ func init() {
{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + auxInt + aux. ar12=mem.
// conversions
{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte
{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
......@@ -283,7 +312,8 @@ func init() {
{name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"}, // float64 -> float32
// conditional instructions
{name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
{name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
{name: "CSELULT0", argLength: 2, reg: gp1flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, 0 otherwise, arg1=flags
// function calls
{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true}, // call static function aux.(*gc.Sym). arg0=mem, auxint=argsize, returns mem
......
This diff is collapsed.
......@@ -714,3 +714,10 @@ const (
AB = obj.AJMP
ABL = obj.ACALL
)
const (
// shift types
SHIFT_LL = 0 << 22
SHIFT_LR = 1 << 22
SHIFT_AR = 2 << 22
)
......@@ -112,13 +112,17 @@ import (
// val = int32(y)
//
// reg<<shift, reg>>shift, reg->shift, reg@>shift
// Shifted register value, for ARM.
// Shifted register value, for ARM and ARM64.
// In this form, reg must be a register and shift can be a register or an integer constant.
// Encoding:
// type = TYPE_SHIFT
// On ARM:
// offset = (reg&15) | shifttype<<5 | count
// shifttype = 0, 1, 2, 3 for <<, >>, ->, @>
// count = (reg&15)<<8 | 1<<4 for a register shift count, (n&31)<<7 for an integer constant.
// On ARM64:
// offset = (reg&31)<<16 | shifttype<<22 | (count&63)<<10
// shifttype = 0, 1, 2 for <<, >>, ->
//
// (reg, reg)
// A destination register pair. When used as the last argument of an instruction,
......
......@@ -286,14 +286,23 @@ func Dconv(p *Prog, a *Addr) string {
case TYPE_SHIFT:
v := int(a.Offset)
op := "<<>>->@>"[((v>>5)&3)<<1:]
if v&(1<<4) != 0 {
str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
} else {
str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
}
if a.Reg != 0 {
str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
ops := "<<>>->@>"
switch goarch := Getgoarch(); goarch {
case "arm":
op := ops[((v>>5)&3)<<1:]
if v&(1<<4) != 0 {
str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
} else {
str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
}
if a.Reg != 0 {
str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
}
case "arm64":
op := ops[((v>>22)&3)<<1:]
str = fmt.Sprintf("R%d%c%c%d", (v>>16)&31, op[0], op[1], (v>>10)&63)
default:
panic("TYPE_SHIFT is not supported on " + goarch)
}
case TYPE_REGREG:
......
// +build !amd64,!arm,!amd64p32,!386
// +build !amd64,!arm,!amd64p32,!386,!arm64
// errorcheck -0 -l -live -wb=0
// Copyright 2014 The Go Authors. All rights reserved.
......
// +build amd64 arm amd64p32 386
// +build amd64 arm amd64p32 386 arm64
// errorcheck -0 -l -live -wb=0
// Copyright 2014 The Go Authors. All rights reserved.
......
......@@ -2,7 +2,7 @@
// Fails on ppc64x because of incomplete optimization.
// See issues 9058.
// Same reason for mips64x and s390x.
// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32,!386
// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32,!386,!arm64
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
......
// errorcheck -0 -d=nil
// +build amd64 arm amd64p32 386
// +build amd64 arm amd64p32 386 arm64
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment