Commit c430adf1 authored by fanzha02's avatar fanzha02 Committed by Cherry Zhang

cmd/internal/obj/arm64: encode float constants into FMOVS/FMOVD instructions

Current assembler rewrites float constants to values stored in memory
except 0.0, which is not performant. This patch uses the FMOVS/FMOVD
instructions to move some available floating-point immediate constants
into SIMD&FP destination registers. These available constants can be
encoded into FMOVS/FMOVD instructions, checked by the chipfloat7() function.

go1 benchmark results.
name                     old time/op    new time/op    delta
BinaryTree17-8              6.27s ± 1%     6.27s ± 1%    ~     (p=0.762 n=10+8)
Fannkuch11-8                5.42s ± 1%     5.38s ± 0%  -0.63%  (p=0.000 n=10+10)
FmtFprintfEmpty-8          92.9ns ± 1%    93.4ns ± 0%  +0.47%  (p=0.004 n=9+8)
FmtFprintfString-8          169ns ± 2%     170ns ± 4%    ~     (p=0.378 n=10+10)
FmtFprintfInt-8             197ns ± 1%     196ns ± 1%  -0.77%  (p=0.009 n=10+9)
FmtFprintfIntInt-8          284ns ± 1%     286ns ± 1%    ~     (p=0.051 n=10+10)
FmtFprintfPrefixedInt-8     419ns ± 0%     422ns ± 1%  +0.69%  (p=0.038 n=6+10)
FmtFprintfFloat-8           458ns ± 0%     463ns ± 1%  +1.14%  (p=0.000 n=10+10)
FmtManyArgs-8              1.35µs ± 2%    1.36µs ± 1%  +0.91%  (p=0.043 n=10+10)
GobDecode-8                16.0ms ± 2%    15.5ms ± 1%   -3.39%  (p=0.000 n=10+10)
GobEncode-8                11.9ms ± 3%    11.4ms ± 1%   -3.98%  (p=0.000 n=10+9)
Gzip-8                      621ms ± 0%     625ms ± 0%   +0.59%  (p=0.000 n=9+10)
Gunzip-8                   74.0ms ± 1%    74.3ms ± 0%     ~     (p=0.059 n=9+8)
HTTPClientServer-8          116µs ± 1%     116µs ± 1%     ~     (p=0.165 n=10+10)
JSONEncode-8               29.3ms ± 1%    29.5ms ± 0%   +0.72%  (p=0.001 n=10+10)
JSONDecode-8                145ms ± 1%     148ms ± 2%   +2.06%  (p=0.000 n=10+10)
Mandelbrot200-8            9.67ms ± 0%    9.48ms ± 1%   -1.92%  (p=0.000 n=8+10)
GoParse-8                  7.55ms ± 0%    7.60ms ± 0%   +0.57%  (p=0.000 n=9+10)
RegexpMatchEasy0_32-8       234ns ± 0%     210ns ± 0%  -10.13%  (p=0.000 n=8+10)
RegexpMatchEasy0_1K-8       753ns ± 1%     729ns ± 0%   -3.17%  (p=0.000 n=10+8)
RegexpMatchEasy1_32-8       225ns ± 0%     224ns ± 0%   -0.44%  (p=0.000 n=9+9)
RegexpMatchEasy1_1K-8      1.03µs ± 0%    1.04µs ± 1%   +1.29%  (p=0.000 n=10+10)
RegexpMatchMedium_32-8      320ns ± 3%     296ns ± 6%   -7.50%  (p=0.000 n=10+10)
RegexpMatchMedium_1K-8     77.0µs ± 5%    73.6µs ± 1%     ~     (p=0.393 n=10+10)
RegexpMatchHard_32-8       3.93µs ± 0%    3.89µs ± 1%   -0.95%  (p=0.000 n=10+9)
RegexpMatchHard_1K-8        120µs ± 5%     115µs ± 1%     ~     (p=0.739 n=10+10)
Revcomp-8                   1.07s ± 0%     1.08s ± 1%   +0.63%  (p=0.000 n=10+9)
Template-8                  165ms ± 1%     163ms ± 1%   -1.05%  (p=0.001 n=8+10)
TimeParse-8                 751ns ± 1%     749ns ± 1%     ~     (p=0.209 n=10+10)
TimeFormat-8                759ns ± 1%     751ns ± 1%   -0.96%  (p=0.001 n=10+10)

name                     old speed      new speed      delta
GobDecode-8              48.0MB/s ± 2%  49.6MB/s ± 1%   +3.50%  (p=0.000 n=10+10)
GobEncode-8              64.5MB/s ± 3%  67.1MB/s ± 1%   +4.08%  (p=0.000 n=10+9)
Gzip-8                   31.2MB/s ± 0%  31.1MB/s ± 0%   -0.55%  (p=0.000 n=9+8)
Gunzip-8                  262MB/s ± 1%   261MB/s ± 0%     ~     (p=0.059 n=9+8)
JSONEncode-8             66.3MB/s ± 1%  65.8MB/s ± 0%   -0.72%  (p=0.001 n=10+10)
JSONDecode-8             13.4MB/s ± 1%  13.2MB/s ± 1%   -2.02%  (p=0.000 n=10+10)
GoParse-8                7.67MB/s ± 0%  7.63MB/s ± 0%   -0.57%  (p=0.000 n=9+10)
RegexpMatchEasy0_32-8     136MB/s ± 0%   152MB/s ± 0%  +11.45%  (p=0.000 n=10+10)
RegexpMatchEasy0_1K-8    1.36GB/s ± 1%  1.40GB/s ± 0%   +3.25%  (p=0.000 n=10+8)
RegexpMatchEasy1_32-8     142MB/s ± 0%   143MB/s ± 0%   +0.35%  (p=0.000 n=10+9)
RegexpMatchEasy1_1K-8     992MB/s ± 0%   980MB/s ± 1%   -1.27%  (p=0.000 n=10+10)
RegexpMatchMedium_32-8   3.12MB/s ± 3%  3.38MB/s ± 6%   +8.17%  (p=0.000 n=10+10)
RegexpMatchMedium_1K-8   13.3MB/s ± 5%  13.9MB/s ± 1%     ~     (p=0.362 n=10+10)
RegexpMatchHard_32-8     8.14MB/s ± 0%  8.21MB/s ± 1%   +0.95%  (p=0.000 n=10+9)
RegexpMatchHard_1K-8     8.54MB/s ± 5%  8.90MB/s ± 1%     ~     (p=0.636 n=10+10)
Revcomp-8                 238MB/s ± 0%   236MB/s ± 1%   -0.63%  (p=0.000 n=10+9)
Template-8               11.8MB/s ± 1%  11.9MB/s ± 1%   +1.07%  (p=0.001 n=8+10)

Change-Id: I57b372d8dcd47e6aec39893843b20385d5d9c37e
Reviewed-on: https://go-review.googlesource.com/129555
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarCherry Zhang <cherryyz@google.com>
parent 81957dd5
......@@ -163,6 +163,12 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
MOVB (R29)(R30<<0), R14 // ae7bbe38
MOVB (R29)(R30), R14 // MOVB (R29)(R30*1), R14 // ae6bbe38
MOVB R4, (R2)(R6.SXTX) // 44e82638
FMOVS $(4.0), F0 // 0010221e
FMOVD $(4.0), F0 // 0010621e
FMOVS $(0.265625), F1 // 01302a1e
FMOVD $(0.1796875), F2 // 02f0681e
FMOVS $(0.96875), F3 // 03f02d1e
FMOVD $(28.0), F4 // 0490671e
FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
FMOVS (R2)(R6<<2), F4 // 447866bc
......@@ -479,14 +485,14 @@ again:
// {
// outcode($1, &$2, NREG, &$4);
// }
FADDD $0.5, F1 // FADDD $(0.5), F1
// FADDD $0.5, F1 // FADDD $(0.5), F1
FADDD F1, F2
// LTYPEK frcon ',' freg ',' freg
// {
// outcode($1, &$2, $4.reg, &$6);
// }
FADDD $0.7, F1, F2 // FADDD $(0.69999999999999996), F1, F2
// FADDD $0.7, F1, F2 // FADDD $(0.69999999999999996), F1, F2
FADDD F1, F2, F3
//
......
......@@ -219,8 +219,6 @@ var optab = []Optab{
{AFADDS, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFADDS, C_FREG, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFADDS, C_FCON, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFADDS, C_FCON, C_FREG, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFMSUBD, C_FREG, C_FREG, C_FREG, C_FREG, 15, 4, 0, 0, 0},
{AFCMPS, C_FREG, C_FREG, C_NONE, C_NONE, 56, 4, 0, 0, 0},
{AFCMPS, C_FCON, C_FREG, C_NONE, C_NONE, 56, 4, 0, 0, 0},
......@@ -340,9 +338,9 @@ var optab = []Optab{
{AFMOVS, C_ADDR, C_NONE, C_NONE, C_FREG, 65, 12, 0, 0, 0},
{AFMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AFMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, 65, 12, 0, 0, 0},
{AFMOVS, C_FCON, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFMOVS, C_FCON, C_NONE, C_NONE, C_FREG, 55, 4, 0, 0, 0},
{AFMOVS, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFMOVD, C_FCON, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFMOVD, C_FCON, C_NONE, C_NONE, C_FREG, 55, 4, 0, 0, 0},
{AFMOVD, C_FREG, C_NONE, C_NONE, C_FREG, 54, 4, 0, 0, 0},
{AFMOVS, C_REG, C_NONE, C_NONE, C_FREG, 29, 4, 0, 0, 0},
{AFMOVS, C_FREG, C_NONE, C_NONE, C_REG, 29, 4, 0, 0, 0},
......@@ -2461,6 +2459,9 @@ func buildop(ctxt *obj.Link) {
}
}
// chipfloat7() checks if the immediate constants available in FMOVS/FMOVD instructions.
// For details of the range of constants available, see
// http://infocenter.arm.com/help/topic/com.arm.doc.dui0473m/dom1359731199385.html.
func (c *ctxt7) chipfloat7(e float64) int {
ei := math.Float64bits(e)
l := uint32(int32(ei))
......@@ -3486,19 +3487,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
case 54: /* floating point arith */
o1 = c.oprrr(p, p.As)
var rf int
if p.From.Type == obj.TYPE_CONST {
rf = c.chipfloat7(p.From.Val.(float64))
if rf < 0 || true {
c.ctxt.Diag("invalid floating-point immediate\n%v", p)
rf = 0
}
rf |= (1 << 3)
} else {
rf = int(p.From.Reg)
}
rf := int(p.From.Reg)
rt := int(p.To.Reg)
r := int(p.Reg)
if (o1&(0x1F<<24)) == (0x1E<<24) && (o1&(1<<11)) == 0 { /* monadic */
......@@ -3509,6 +3498,18 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
}
o1 |= (uint32(rf&31) << 16) | (uint32(r&31) << 5) | uint32(rt&31)
case 55: /* floating-point constant */
var rf int
o1 = 0xf<<25 | 1<<21 | 1<<12
rf = c.chipfloat7(p.From.Val.(float64))
if rf < 0 {
c.ctxt.Diag("invalid floating-point immediate\n%v", p)
}
if p.As == AFMOVD {
o1 |= 1 << 22
}
o1 |= (uint32(rf&0xff) << 13) | uint32(p.To.Reg&31)
case 56: /* floating point compare */
o1 = c.oprrr(p, p.As)
......
......@@ -254,7 +254,11 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
switch p.As {
case AFMOVS:
if p.From.Type == obj.TYPE_FCONST {
f32 := float32(p.From.Val.(float64))
f64 := p.From.Val.(float64)
f32 := float32(f64)
if c.chipfloat7(f64) > 0 {
break
}
if math.Float32bits(f32) == 0 {
p.From.Type = obj.TYPE_REG
p.From.Reg = REGZERO
......@@ -269,6 +273,9 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
case AFMOVD:
if p.From.Type == obj.TYPE_FCONST {
f64 := p.From.Val.(float64)
if c.chipfloat7(f64) > 0 {
break
}
if math.Float64bits(f64) == 0 {
p.From.Type = obj.TYPE_REG
p.From.Reg = REGZERO
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment