Commit 9d4b40f5 authored by Cherry Zhang's avatar Cherry Zhang

runtime, cmd/compile: implement and use DUFFCOPY on ARM64

Change-Id: I8984eac30e5df78d4b94f19412135d3cc36969f8
Reviewed-on: https://go-review.googlesource.com/29910
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarDavid Chase <drchase@google.com>
parent 7de7d20e
...@@ -618,6 +618,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -618,6 +618,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p3 := gc.Prog(arm64.ABLE) p3 := gc.Prog(arm64.ABLE)
p3.To.Type = obj.TYPE_BRANCH p3.To.Type = obj.TYPE_BRANCH
gc.Patch(p3, p) gc.Patch(p3, p)
case ssa.OpARM64DUFFCOPY:
p := gc.Prog(obj.ADUFFCOPY)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
p.To.Offset = v.AuxInt
case ssa.OpARM64LoweredMove: case ssa.OpARM64LoweredMove:
// MOVD.P 8(R16), Rtmp // MOVD.P 8(R16), Rtmp
// MOVD.P Rtmp, 8(R17) // MOVD.P Rtmp, 8(R17)
......
...@@ -431,8 +431,14 @@ ...@@ -431,8 +431,14 @@
(OffPtr <src.Type> src [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8]) (OffPtr <src.Type> src [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8])
(Move [MakeSizeAndAlign(SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8, 1).Int64()] dst src mem)) (Move [MakeSizeAndAlign(SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8, 1).Int64()] dst src mem))
// medium move uses a duff device
// 8 and 128 are magic constants, see runtime/mkduff.go
(Move [s] dst src mem)
&& SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128
&& !config.noDuffDevice ->
(DUFFCOPY [8 * (128 - int64(SizeAndAlign(s).Size()/8))] dst src mem)
// large move uses a loop // large move uses a loop
// DUFFCOPY is not implemented on ARM64 (TODO)
(Move [s] dst src mem) (Move [s] dst src mem)
&& SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size()%8 == 0 -> && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size()%8 == 0 ->
(LoweredMove (LoweredMove
......
...@@ -376,6 +376,25 @@ func init() { ...@@ -376,6 +376,25 @@ func init() {
faultOnNilArg0: true, faultOnNilArg0: true,
}, },
// duffcopy
// arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
// arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
// arg2 = mem
// auxint = offset into duffcopy code to start executing
// returns mem
// R16, R17 changed as side effect
{
name: "DUFFCOPY",
aux: "Int64",
argLength: 3,
reg: regInfo{
inputs: []regMask{buildReg("R17"), buildReg("R16")},
clobbers: buildReg("R16 R17"),
},
faultOnNilArg0: true,
faultOnNilArg1: true,
},
// large move // large move
// arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect) // arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
// arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect) // arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
......
...@@ -993,6 +993,7 @@ const ( ...@@ -993,6 +993,7 @@ const (
OpARM64GreaterEqualU OpARM64GreaterEqualU
OpARM64DUFFZERO OpARM64DUFFZERO
OpARM64LoweredZero OpARM64LoweredZero
OpARM64DUFFCOPY
OpARM64LoweredMove OpARM64LoweredMove
OpARM64LoweredGetClosurePtr OpARM64LoweredGetClosurePtr
OpARM64MOVDconvert OpARM64MOVDconvert
...@@ -12323,6 +12324,20 @@ var opcodeTable = [...]opInfo{ ...@@ -12323,6 +12324,20 @@ var opcodeTable = [...]opInfo{
clobbers: 65536, // R16 clobbers: 65536, // R16
}, },
}, },
{
name: "DUFFCOPY",
auxType: auxInt64,
argLen: 3,
faultOnNilArg0: true,
faultOnNilArg1: true,
reg: regInfo{
inputs: []inputInfo{
{0, 131072}, // R17
{1, 65536}, // R16
},
clobbers: 196608, // R16 R17
},
},
{ {
name: "LoweredMove", name: "LoweredMove",
argLen: 4, argLen: 4,
......
...@@ -12470,6 +12470,24 @@ func rewriteValueARM64_OpMove(v *Value, config *Config) bool { ...@@ -12470,6 +12470,24 @@ func rewriteValueARM64_OpMove(v *Value, config *Config) bool {
return true return true
} }
// match: (Move [s] dst src mem) // match: (Move [s] dst src mem)
// cond: SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128 && !config.noDuffDevice
// result: (DUFFCOPY [8 * (128 - int64(SizeAndAlign(s).Size()/8))] dst src mem)
for {
s := v.AuxInt
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
if !(SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128 && !config.noDuffDevice) {
break
}
v.reset(OpARM64DUFFCOPY)
v.AuxInt = 8 * (128 - int64(SizeAndAlign(s).Size()/8))
v.AddArg(dst)
v.AddArg(src)
v.AddArg(mem)
return true
}
// match: (Move [s] dst src mem)
// cond: SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size()%8 == 0 // cond: SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size()%8 == 0
// result: (LoweredMove dst src (ADDconst <src.Type> src [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)]) mem) // result: (LoweredMove dst src (ADDconst <src.Type> src [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)]) mem)
for { for {
......
...@@ -3852,8 +3852,7 @@ func opbra(ctxt *obj.Link, a obj.As) uint32 { ...@@ -3852,8 +3852,7 @@ func opbra(ctxt *obj.Link, a obj.As) uint32 {
case AB: case AB:
return 0<<31 | 5<<26 /* imm26 */ return 0<<31 | 5<<26 /* imm26 */
case obj.ADUFFZERO, case obj.ADUFFZERO, obj.ADUFFCOPY, ABL:
ABL:
return 1<<31 | 5<<26 return 1<<31 | 5<<26
} }
......
...@@ -135,4 +135,389 @@ TEXT runtime·duffzero(SB), NOSPLIT, $-8-0 ...@@ -135,4 +135,389 @@ TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
MOVD.W ZR, 8(R16) MOVD.W ZR, 8(R16)
RET RET
// TODO: Implement runtime·duffcopy. TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
MOVD.P 8(R16), R27
MOVD.P R27, 8(R17)
RET
...@@ -161,7 +161,17 @@ func zeroARM64(w io.Writer) { ...@@ -161,7 +161,17 @@ func zeroARM64(w io.Writer) {
} }
func copyARM64(w io.Writer) { func copyARM64(w io.Writer) {
fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.") // R16 (aka REGRT1): ptr to source memory
// R17 (aka REGRT2): ptr to destination memory
// R27 (aka REGTMP): scratch space
// R16 and R17 are updated as a side effect
fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
for i := 0; i < 128; i++ {
fmt.Fprintln(w, "\tMOVD.P\t8(R16), R27")
fmt.Fprintln(w, "\tMOVD.P\tR27, 8(R17)")
fmt.Fprintln(w)
}
fmt.Fprintln(w, "\tRET")
} }
func tagsPPC64x(w io.Writer) { func tagsPPC64x(w io.Writer) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment