Commit 9d4b40f5 authored by Cherry Zhang's avatar Cherry Zhang

runtime, cmd/compile: implement and use DUFFCOPY on ARM64

Change-Id: I8984eac30e5df78d4b94f19412135d3cc36969f8
Reviewed-on: https://go-review.googlesource.com/29910
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarDavid Chase <drchase@google.com>
parent 7de7d20e
......@@ -618,6 +618,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p3 := gc.Prog(arm64.ABLE)
p3.To.Type = obj.TYPE_BRANCH
gc.Patch(p3, p)
case ssa.OpARM64DUFFCOPY:
p := gc.Prog(obj.ADUFFCOPY)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
p.To.Offset = v.AuxInt
case ssa.OpARM64LoweredMove:
// MOVD.P 8(R16), Rtmp
// MOVD.P Rtmp, 8(R17)
......
......@@ -431,8 +431,14 @@
(OffPtr <src.Type> src [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8])
(Move [MakeSizeAndAlign(SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8, 1).Int64()] dst src mem))
// medium move uses a duff device
// 8 and 128 are magic constants, see runtime/mkduff.go
(Move [s] dst src mem)
&& SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128
&& !config.noDuffDevice ->
(DUFFCOPY [8 * (128 - int64(SizeAndAlign(s).Size()/8))] dst src mem)
// large move uses a loop
// DUFFCOPY is not implemented on ARM64 (TODO)
(Move [s] dst src mem)
&& SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size()%8 == 0 ->
(LoweredMove
......
......@@ -376,6 +376,25 @@ func init() {
faultOnNilArg0: true,
},
// duffcopy
// arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
// arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
// arg2 = mem
// auxint = offset into duffcopy code to start executing
// returns mem
// R16, R17 changed as side effect
{
name: "DUFFCOPY",
aux: "Int64",
argLength: 3,
reg: regInfo{
inputs: []regMask{buildReg("R17"), buildReg("R16")},
clobbers: buildReg("R16 R17"),
},
faultOnNilArg0: true,
faultOnNilArg1: true,
},
// large move
// arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
// arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
......
......@@ -993,6 +993,7 @@ const (
OpARM64GreaterEqualU
OpARM64DUFFZERO
OpARM64LoweredZero
OpARM64DUFFCOPY
OpARM64LoweredMove
OpARM64LoweredGetClosurePtr
OpARM64MOVDconvert
......@@ -12323,6 +12324,20 @@ var opcodeTable = [...]opInfo{
clobbers: 65536, // R16
},
},
{
name: "DUFFCOPY",
auxType: auxInt64,
argLen: 3,
faultOnNilArg0: true,
faultOnNilArg1: true,
reg: regInfo{
inputs: []inputInfo{
{0, 131072}, // R17
{1, 65536}, // R16
},
clobbers: 196608, // R16 R17
},
},
{
name: "LoweredMove",
argLen: 4,
......
......@@ -12470,6 +12470,24 @@ func rewriteValueARM64_OpMove(v *Value, config *Config) bool {
return true
}
// match: (Move [s] dst src mem)
// cond: SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128 && !config.noDuffDevice
// result: (DUFFCOPY [8 * (128 - int64(SizeAndAlign(s).Size()/8))] dst src mem)
for {
s := v.AuxInt
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
if !(SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128 && !config.noDuffDevice) {
break
}
v.reset(OpARM64DUFFCOPY)
v.AuxInt = 8 * (128 - int64(SizeAndAlign(s).Size()/8))
v.AddArg(dst)
v.AddArg(src)
v.AddArg(mem)
return true
}
// match: (Move [s] dst src mem)
// cond: SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size()%8 == 0
// result: (LoweredMove dst src (ADDconst <src.Type> src [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)]) mem)
for {
......
......@@ -3852,8 +3852,7 @@ func opbra(ctxt *obj.Link, a obj.As) uint32 {
case AB:
return 0<<31 | 5<<26 /* imm26 */
case obj.ADUFFZERO,
ABL:
case obj.ADUFFZERO, obj.ADUFFCOPY, ABL:
return 1<<31 | 5<<26
}
......
......@@ -135,4 +135,389 @@ TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
MOVD.W ZR, 8(R16)
RET
// TODO: Implement runtime·duffcopy.
TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
RET
......@@ -161,7 +161,17 @@ func zeroARM64(w io.Writer) {
}
func copyARM64(w io.Writer) {
fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
// R16 (aka REGRT1): ptr to source memory
// R17 (aka REGRT2): ptr to destination memory
// R27 (aka REGTMP): scratch space
// R16 and R17 are updated as a side effect
fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
for i := 0; i < 128; i++ {
fmt.Fprintln(w, "\tMOVD.P\t8(R16), R27")
fmt.Fprintln(w, "\tMOVD.P\tR27, 8(R17)")
fmt.Fprintln(w)
}
fmt.Fprintln(w, "\tRET")
}
func tagsPPC64x(w io.Writer) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment