Commit f4c3072c authored by Ben Shi's avatar Ben Shi Committed by Brad Fitzpatrick

cmd/compile: improve FP performance on ARM64

FMADD/FMSUB/FNMADD/FNMSUB are efficient FP instructions, which can
be used by the comiler to improve FP performance. This CL implements
this optimization.

1. The compilecmp benchmark shows little change.
name        old time/op       new time/op       delta
Template          2.35s ± 4%        2.38s ± 4%    ~     (p=0.161 n=15+15)
Unicode           1.36s ± 5%        1.36s ± 4%    ~     (p=0.685 n=14+13)
GoTypes           8.11s ± 3%        8.13s ± 2%    ~     (p=0.624 n=15+15)
Compiler          40.5s ± 2%        40.7s ± 2%    ~     (p=0.137 n=15+15)
SSA                115s ± 3%         116s ± 1%    ~     (p=0.270 n=15+14)
Flate             1.46s ± 4%        1.45s ± 5%    ~     (p=0.870 n=15+15)
GoParser          1.85s ± 2%        1.87s ± 3%    ~     (p=0.477 n=14+15)
Reflect           5.11s ± 4%        5.10s ± 2%    ~     (p=0.624 n=15+15)
Tar               2.23s ± 3%        2.23s ± 5%    ~     (p=0.624 n=15+15)
XML               2.72s ± 5%        2.74s ± 3%    ~     (p=0.290 n=15+14)
[Geo mean]        5.02s             5.03s       +0.29%

name        old user-time/op  new user-time/op  delta
Template          2.90s ± 2%        2.90s ± 3%    ~     (p=0.780 n=14+15)
Unicode           1.71s ± 5%        1.70s ± 3%    ~     (p=0.458 n=14+13)
GoTypes           9.77s ± 2%        9.76s ± 2%    ~     (p=0.838 n=15+15)
Compiler          49.1s ± 2%        49.1s ± 2%    ~     (p=0.902 n=15+15)
SSA                144s ± 1%         144s ± 2%    ~     (p=0.567 n=15+15)
Flate             1.75s ± 5%        1.74s ± 3%    ~     (p=0.461 n=15+15)
GoParser          2.22s ± 2%        2.21s ± 3%    ~     (p=0.233 n=15+15)
Reflect           5.99s ± 2%        5.95s ± 1%    ~     (p=0.093 n=14+15)
Tar               2.68s ± 2%        2.67s ± 3%    ~     (p=0.310 n=14+15)
XML               3.22s ± 2%        3.24s ± 3%    ~     (p=0.512 n=15+15)
[Geo mean]        6.08s             6.07s       -0.19%

name        old text-bytes    new text-bytes    delta
HelloSize         641kB ± 0%        641kB ± 0%    ~     (all equal)

name        old data-bytes    new data-bytes    delta
HelloSize        9.46kB ± 0%       9.46kB ± 0%    ~     (all equal)

name        old bss-bytes     new bss-bytes     delta
HelloSize         125kB ± 0%        125kB ± 0%    ~     (all equal)

name        old exe-bytes     new exe-bytes     delta
HelloSize        1.24MB ± 0%       1.24MB ± 0%    ~     (all equal)

2. The go1 benchmark shows little improvement in total (excluding noise),
but some improvement in test case Mandelbrot200 and FmtFprintfFloat.
name                     old time/op    new time/op    delta
BinaryTree17-4              42.1s ± 2%     42.0s ± 2%    ~     (p=0.453 n=30+28)
Fannkuch11-4                33.5s ± 3%     33.3s ± 3%  -0.38%  (p=0.045 n=30+30)
FmtFprintfEmpty-4           534ns ± 0%     534ns ± 0%    ~     (all equal)
FmtFprintfString-4         1.09µs ± 0%    1.09µs ± 0%  -0.27%  (p=0.000 n=23+17)
FmtFprintfInt-4            1.16µs ± 3%    1.16µs ± 3%    ~     (p=0.714 n=30+30)
FmtFprintfIntInt-4         1.76µs ± 1%    1.77µs ± 0%  +0.15%  (p=0.002 n=23+23)
FmtFprintfPrefixedInt-4    2.21µs ± 3%    2.20µs ± 3%    ~     (p=0.390 n=30+30)
FmtFprintfFloat-4          3.28µs ± 0%    3.11µs ± 0%  -5.01%  (p=0.000 n=25+26)
FmtManyArgs-4              7.18µs ± 0%    7.19µs ± 0%  +0.13%  (p=0.000 n=24+25)
GobDecode-4                94.9ms ± 0%    95.6ms ± 5%  +0.83%  (p=0.002 n=23+29)
GobEncode-4                80.7ms ± 4%    79.8ms ± 0%  -1.11%  (p=0.003 n=30+24)
Gzip-4                      4.58s ± 4%     4.59s ± 3%  +0.26%  (p=0.002 n=30+26)
Gunzip-4                    449ms ± 4%     443ms ± 0%    ~     (p=0.096 n=30+26)
HTTPClientServer-4          553µs ± 1%     548µs ± 1%  -0.96%  (p=0.000 n=30+30)
JSONEncode-4                215ms ± 4%     214ms ± 4%  -0.29%  (p=0.000 n=30+30)
JSONDecode-4                868ms ± 4%     875ms ± 5%  +0.79%  (p=0.008 n=30+30)
Mandelbrot200-4            51.4ms ± 0%    46.7ms ± 3%  -9.09%  (p=0.000 n=25+26)
GoParse-4                  42.1ms ± 0%    41.8ms ± 0%  -0.61%  (p=0.000 n=25+24)
RegexpMatchEasy0_32-4      1.02µs ± 4%    1.02µs ± 4%  -0.17%  (p=0.000 n=30+30)
RegexpMatchEasy0_1K-4      3.90µs ± 0%    3.95µs ± 4%    ~     (p=0.516 n=23+30)
RegexpMatchEasy1_32-4       970ns ± 3%     973ns ± 3%    ~     (p=0.951 n=30+30)
RegexpMatchEasy1_1K-4      6.43µs ± 3%    6.33µs ± 0%  -1.62%  (p=0.000 n=30+25)
RegexpMatchMedium_32-4     1.75µs ± 0%    1.75µs ± 0%    ~     (p=0.422 n=25+24)
RegexpMatchMedium_1K-4      568µs ± 3%     562µs ± 0%    ~     (p=0.079 n=30+24)
RegexpMatchHard_32-4       30.8µs ± 0%    31.2µs ± 4%  +1.46%  (p=0.018 n=23+30)
RegexpMatchHard_1K-4        932µs ± 0%     946µs ± 3%  +1.49%  (p=0.000 n=24+30)
Revcomp-4                   7.69s ± 3%     7.69s ± 2%  +0.04%  (p=0.032 n=24+25)
Template-4                  893ms ± 5%     880ms ± 6%  -1.53%  (p=0.000 n=30+30)
TimeParse-4                4.90µs ± 3%    4.84µs ± 0%    ~     (p=0.080 n=30+25)
TimeFormat-4               4.70µs ± 1%    4.76µs ± 0%  +1.21%  (p=0.000 n=23+26)
[Geo mean]                  710µs          706µs       -0.63%

name                     old speed      new speed      delta
GobDecode-4              8.09MB/s ± 0%  8.03MB/s ± 5%  -0.77%  (p=0.002 n=23+29)
GobEncode-4              9.52MB/s ± 4%  9.62MB/s ± 0%  +1.07%  (p=0.003 n=30+24)
Gzip-4                   4.24MB/s ± 4%  4.23MB/s ± 3%  -0.35%  (p=0.002 n=30+26)
Gunzip-4                 43.2MB/s ± 4%  43.8MB/s ± 0%    ~     (p=0.123 n=30+26)
JSONEncode-4             9.03MB/s ± 4%  9.06MB/s ± 4%  +0.28%  (p=0.000 n=30+30)
JSONDecode-4             2.24MB/s ± 4%  2.22MB/s ± 5%  -0.79%  (p=0.008 n=30+30)
GoParse-4                1.38MB/s ± 1%  1.38MB/s ± 0%    ~     (p=0.401 n=25+17)
RegexpMatchEasy0_32-4    31.4MB/s ± 4%  31.5MB/s ± 3%  +0.16%  (p=0.000 n=30+30)
RegexpMatchEasy0_1K-4     262MB/s ± 0%   259MB/s ± 4%    ~     (p=0.693 n=23+30)
RegexpMatchEasy1_32-4    33.0MB/s ± 3%  32.9MB/s ± 3%    ~     (p=0.139 n=30+30)
RegexpMatchEasy1_1K-4     159MB/s ± 3%   162MB/s ± 0%  +1.60%  (p=0.000 n=30+25)
RegexpMatchMedium_32-4    570kB/s ± 0%   570kB/s ± 0%    ~     (all equal)
RegexpMatchMedium_1K-4   1.80MB/s ± 3%  1.82MB/s ± 0%  +1.09%  (p=0.007 n=30+24)
RegexpMatchHard_32-4     1.04MB/s ± 0%  1.03MB/s ± 3%  -1.38%  (p=0.003 n=23+30)
RegexpMatchHard_1K-4     1.10MB/s ± 0%  1.08MB/s ± 3%  -1.52%  (p=0.000 n=24+30)
Revcomp-4                33.0MB/s ± 3%  33.0MB/s ± 2%    ~     (p=0.128 n=24+25)
Template-4               2.17MB/s ± 5%  2.21MB/s ± 6%  +1.61%  (p=0.000 n=30+30)
[Geo mean]               7.79MB/s       7.79MB/s       +0.05%

Change-Id: Ied3dbdb5ba8e386168629cba06fcd4263bbb83e1
Reviewed-on: https://go-review.googlesource.com/94901
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: 's avatarBrad Fitzpatrick <bradfitz@golang.org>
parent f5de4200
......@@ -186,6 +186,25 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.Reg = r1
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpARM64FMADDS,
ssa.OpARM64FMADDD,
ssa.OpARM64FNMADDS,
ssa.OpARM64FNMADDD,
ssa.OpARM64FMSUBS,
ssa.OpARM64FMSUBD,
ssa.OpARM64FNMSUBS,
ssa.OpARM64FNMSUBD:
rt := v.Reg()
ra := v.Args[0].Reg()
rm := v.Args[1].Reg()
rn := v.Args[2].Reg()
p := s.Prog(v.Op.Asm())
p.Reg = ra
p.From.Type = obj.TYPE_REG
p.From.Reg = rm
p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: rn})
p.To.Type = obj.TYPE_REG
p.To.Reg = rt
case ssa.OpARM64ADDconst,
ssa.OpARM64SUBconst,
ssa.OpARM64ANDconst,
......
......@@ -1448,3 +1448,15 @@
(FNEGD (FNMULD x y)) -> (FMULD x y)
(FNMULS (FNEGS x) y) -> (FMULS x y)
(FNMULD (FNEGD x) y) -> (FMULD x y)
(FADDS a (FMULS x y)) -> (FMADDS a x y)
(FADDD a (FMULD x y)) -> (FMADDD a x y)
(FSUBS a (FMULS x y)) -> (FMSUBS a x y)
(FSUBD a (FMULD x y)) -> (FMSUBD a x y)
(FSUBS (FMULS x y) a) -> (FNMSUBS a x y)
(FSUBD (FMULD x y) a) -> (FNMSUBD a x y)
(FADDS a (FNMULS x y)) -> (FMSUBS a x y)
(FADDD a (FNMULD x y)) -> (FMSUBD a x y)
(FSUBS a (FNMULS x y)) -> (FMADDS a x y)
(FSUBD a (FNMULD x y)) -> (FMADDD a x y)
(FSUBS (FNMULS x y) a) -> (FNMADDS a x y)
(FSUBD (FNMULD x y) a) -> (FNMADDD a x y)
......@@ -152,6 +152,7 @@ func init() {
fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}}
gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}}
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
fp2flags = regInfo{inputs: []regMask{fp, fp}}
fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
fpstore = regInfo{inputs: []regMask{gpspsbg, fp}}
......@@ -216,6 +217,16 @@ func init() {
{name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit
{name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
// 3-operand, the addend comes first
{name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"}, // +arg0 + (arg1 * arg2)
{name: "FMADDD", argLength: 3, reg: fp31, asm: "FMADDD"}, // +arg0 + (arg1 * arg2)
{name: "FNMADDS", argLength: 3, reg: fp31, asm: "FNMADDS"}, // -arg0 - (arg1 * arg2)
{name: "FNMADDD", argLength: 3, reg: fp31, asm: "FNMADDD"}, // -arg0 - (arg1 * arg2)
{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS"}, // +arg0 - (arg1 * arg2)
{name: "FMSUBD", argLength: 3, reg: fp31, asm: "FMSUBD"}, // +arg0 - (arg1 * arg2)
{name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS"}, // -arg0 + (arg1 * arg2)
{name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2)
// shifts
{name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64
{name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt
......
......@@ -1003,6 +1003,14 @@ const (
OpARM64CLZW
OpARM64VCNT
OpARM64VUADDLV
OpARM64FMADDS
OpARM64FMADDD
OpARM64FNMADDS
OpARM64FNMADDD
OpARM64FMSUBS
OpARM64FMSUBD
OpARM64FNMSUBS
OpARM64FNMSUBD
OpARM64SLL
OpARM64SLLconst
OpARM64SRL
......@@ -12757,6 +12765,126 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FMADDS",
argLen: 3,
asm: arm64.AFMADDS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FMADDD",
argLen: 3,
asm: arm64.AFMADDD,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FNMADDS",
argLen: 3,
asm: arm64.AFNMADDS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FNMADDD",
argLen: 3,
asm: arm64.AFNMADDD,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FMSUBS",
argLen: 3,
asm: arm64.AFMSUBS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FMSUBD",
argLen: 3,
asm: arm64.AFMSUBD,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FNMSUBS",
argLen: 3,
asm: arm64.AFNMSUBS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FNMSUBD",
argLen: 3,
asm: arm64.AFNMSUBD,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "SLL",
argLen: 2,
......
......@@ -69,6 +69,10 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64DIVW_0(v)
case OpARM64Equal:
return rewriteValueARM64_OpARM64Equal_0(v)
case OpARM64FADDD:
return rewriteValueARM64_OpARM64FADDD_0(v)
case OpARM64FADDS:
return rewriteValueARM64_OpARM64FADDS_0(v)
case OpARM64FMOVDgpfp:
return rewriteValueARM64_OpARM64FMOVDgpfp_0(v)
case OpARM64FMOVDload:
......@@ -91,6 +95,10 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FNMULD_0(v)
case OpARM64FNMULS:
return rewriteValueARM64_OpARM64FNMULS_0(v)
case OpARM64FSUBD:
return rewriteValueARM64_OpARM64FSUBD_0(v)
case OpARM64FSUBS:
return rewriteValueARM64_OpARM64FSUBS_0(v)
case OpARM64GreaterEqual:
return rewriteValueARM64_OpARM64GreaterEqual_0(v)
case OpARM64GreaterEqualU:
......@@ -2972,6 +2980,164 @@ func rewriteValueARM64_OpARM64Equal_0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64FADDD_0(v *Value) bool {
// match: (FADDD a (FMULD x y))
// cond:
// result: (FMADDD a x y)
for {
_ = v.Args[1]
a := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FMULD {
break
}
_ = v_1.Args[1]
x := v_1.Args[0]
y := v_1.Args[1]
v.reset(OpARM64FMADDD)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FADDD (FMULD x y) a)
// cond:
// result: (FMADDD a x y)
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FMULD {
break
}
_ = v_0.Args[1]
x := v_0.Args[0]
y := v_0.Args[1]
a := v.Args[1]
v.reset(OpARM64FMADDD)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FADDD a (FNMULD x y))
// cond:
// result: (FMSUBD a x y)
for {
_ = v.Args[1]
a := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FNMULD {
break
}
_ = v_1.Args[1]
x := v_1.Args[0]
y := v_1.Args[1]
v.reset(OpARM64FMSUBD)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FADDD (FNMULD x y) a)
// cond:
// result: (FMSUBD a x y)
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FNMULD {
break
}
_ = v_0.Args[1]
x := v_0.Args[0]
y := v_0.Args[1]
a := v.Args[1]
v.reset(OpARM64FMSUBD)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
return false
}
func rewriteValueARM64_OpARM64FADDS_0(v *Value) bool {
// match: (FADDS a (FMULS x y))
// cond:
// result: (FMADDS a x y)
for {
_ = v.Args[1]
a := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FMULS {
break
}
_ = v_1.Args[1]
x := v_1.Args[0]
y := v_1.Args[1]
v.reset(OpARM64FMADDS)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FADDS (FMULS x y) a)
// cond:
// result: (FMADDS a x y)
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FMULS {
break
}
_ = v_0.Args[1]
x := v_0.Args[0]
y := v_0.Args[1]
a := v.Args[1]
v.reset(OpARM64FMADDS)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FADDS a (FNMULS x y))
// cond:
// result: (FMSUBS a x y)
for {
_ = v.Args[1]
a := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FNMULS {
break
}
_ = v_1.Args[1]
x := v_1.Args[0]
y := v_1.Args[1]
v.reset(OpARM64FMSUBS)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FADDS (FNMULS x y) a)
// cond:
// result: (FMSUBS a x y)
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FNMULS {
break
}
_ = v_0.Args[1]
x := v_0.Args[0]
y := v_0.Args[1]
a := v.Args[1]
v.reset(OpARM64FMSUBS)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVDgpfp_0(v *Value) bool {
b := v.Block
_ = b
......@@ -3456,6 +3622,164 @@ func rewriteValueARM64_OpARM64FNMULS_0(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64FSUBD_0(v *Value) bool {
// match: (FSUBD a (FMULD x y))
// cond:
// result: (FMSUBD a x y)
for {
_ = v.Args[1]
a := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FMULD {
break
}
_ = v_1.Args[1]
x := v_1.Args[0]
y := v_1.Args[1]
v.reset(OpARM64FMSUBD)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FSUBD (FMULD x y) a)
// cond:
// result: (FNMSUBD a x y)
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FMULD {
break
}
_ = v_0.Args[1]
x := v_0.Args[0]
y := v_0.Args[1]
a := v.Args[1]
v.reset(OpARM64FNMSUBD)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FSUBD a (FNMULD x y))
// cond:
// result: (FMADDD a x y)
for {
_ = v.Args[1]
a := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FNMULD {
break
}
_ = v_1.Args[1]
x := v_1.Args[0]
y := v_1.Args[1]
v.reset(OpARM64FMADDD)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FSUBD (FNMULD x y) a)
// cond:
// result: (FNMADDD a x y)
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FNMULD {
break
}
_ = v_0.Args[1]
x := v_0.Args[0]
y := v_0.Args[1]
a := v.Args[1]
v.reset(OpARM64FNMADDD)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
return false
}
func rewriteValueARM64_OpARM64FSUBS_0(v *Value) bool {
// match: (FSUBS a (FMULS x y))
// cond:
// result: (FMSUBS a x y)
for {
_ = v.Args[1]
a := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FMULS {
break
}
_ = v_1.Args[1]
x := v_1.Args[0]
y := v_1.Args[1]
v.reset(OpARM64FMSUBS)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FSUBS (FMULS x y) a)
// cond:
// result: (FNMSUBS a x y)
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FMULS {
break
}
_ = v_0.Args[1]
x := v_0.Args[0]
y := v_0.Args[1]
a := v.Args[1]
v.reset(OpARM64FNMSUBS)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FSUBS a (FNMULS x y))
// cond:
// result: (FMADDS a x y)
for {
_ = v.Args[1]
a := v.Args[0]
v_1 := v.Args[1]
if v_1.Op != OpARM64FNMULS {
break
}
_ = v_1.Args[1]
x := v_1.Args[0]
y := v_1.Args[1]
v.reset(OpARM64FMADDS)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
// match: (FSUBS (FNMULS x y) a)
// cond:
// result: (FNMADDS a x y)
for {
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARM64FNMULS {
break
}
_ = v_0.Args[1]
x := v_0.Args[0]
y := v_0.Args[1]
a := v.Args[1]
v.reset(OpARM64FNMADDS)
v.AddArg(a)
v.AddArg(x)
v.AddArg(y)
return true
}
return false
}
func rewriteValueARM64_OpARM64GreaterEqual_0(v *Value) bool {
// match: (GreaterEqual (FlagEQ))
// cond:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment