Commit 357f7336 authored by Rémy Oudompheng's avatar Rémy Oudompheng

cmd/5c, cmd/5g, cmd/5l: turn MOVB, MOVH into plain moves, optimize short arithmetic.

Pseudo-instructions MOVBS and MOVHS are used to clarify
the semantics of short integers vs. registers:
 * 8-bit and 16-bit values in registers are assumed to always
   be zero-extended or sign-extended depending on their type.
 * MOVB is truncation or move of an already extended value
   between registers.
 * MOVBU enforces zero-extension at the destination (register).
 * MOVBS enforces sign-extension at the destination (register).
And similarly for MOVH/MOVS/MOVHU.

The linker is adapted to assemble MOVB and MOVH to an ordinary
mov. Also a peephole pass in 5g that aims at eliminating
redundant zero/sign extensions is improved.

encoding/binary:
benchmark                              old ns/op    new ns/op    delta
BenchmarkReadSlice1000Int32s              220387       217185   -1.45%
BenchmarkReadStruct                        12839        12910   +0.55%
BenchmarkReadInts                           5692         5534   -2.78%
BenchmarkWriteInts                          6137         6016   -1.97%
BenchmarkPutUvarint32                        257          241   -6.23%
BenchmarkPutUvarint64                        812          754   -7.14%
benchmark                               old MB/s     new MB/s  speedup
BenchmarkReadSlice1000Int32s               18.15        18.42    1.01x
BenchmarkReadStruct                         5.45         5.42    0.99x
BenchmarkReadInts                           5.27         5.42    1.03x
BenchmarkWriteInts                          4.89         4.99    1.02x
BenchmarkPutUvarint32                      15.56        16.57    1.06x
BenchmarkPutUvarint64                       9.85        10.60    1.08x

crypto/des:
benchmark                              old ns/op    new ns/op    delta
BenchmarkEncrypt                            7002         5169  -26.18%
BenchmarkDecrypt                            7015         5195  -25.94%
benchmark                               old MB/s     new MB/s  speedup
BenchmarkEncrypt                            1.14         1.55    1.36x
BenchmarkDecrypt                            1.14         1.54    1.35x

strconv:
benchmark                              old ns/op    new ns/op    delta
BenchmarkAtof64Decimal                       457          385  -15.75%
BenchmarkAtof64Float                         574          479  -16.55%
BenchmarkAtof64FloatExp                     1035          906  -12.46%
BenchmarkAtof64Big                          1793         1457  -18.74%
BenchmarkAtof64RandomBits                   2267         2066   -8.87%
BenchmarkAtof64RandomFloats                 1416         1194  -15.68%
BenchmarkAtof32Decimal                       451          379  -15.96%
BenchmarkAtof32Float                         547          435  -20.48%
BenchmarkAtof32FloatExp                     1095          986   -9.95%
BenchmarkAtof32Random                       1154         1006  -12.82%
BenchmarkAtoi                               1415         1380   -2.47%
BenchmarkAtoiNeg                            1414         1401   -0.92%
BenchmarkAtoi64                             1744         1671   -4.19%
BenchmarkAtoi64Neg                          1737         1662   -4.32%

Fixes #1837.

R=rsc, dave, bradfitz
CC=golang-dev
https://golang.org/cl/12424043
parent 51e9858a
...@@ -559,9 +559,9 @@ addmove(Reg *r, int bn, int rn, int f) ...@@ -559,9 +559,9 @@ addmove(Reg *r, int bn, int rn, int f)
p1->as = AMOVW; p1->as = AMOVW;
if(v->etype == TCHAR || v->etype == TUCHAR) if(v->etype == TCHAR || v->etype == TUCHAR)
p1->as = AMOVB; p1->as = AMOVBS;
if(v->etype == TSHORT || v->etype == TUSHORT) if(v->etype == TSHORT || v->etype == TUSHORT)
p1->as = AMOVH; p1->as = AMOVHS;
if(v->etype == TFLOAT) if(v->etype == TFLOAT)
p1->as = AMOVF; p1->as = AMOVF;
if(v->etype == TDOUBLE) if(v->etype == TDOUBLE)
......
...@@ -466,6 +466,16 @@ abop: // asymmetric binary ...@@ -466,6 +466,16 @@ abop: // asymmetric binary
cgen(nl, &n1); cgen(nl, &n1);
} }
gins(a, &n2, &n1); gins(a, &n2, &n1);
// Normalize result for types smaller than word.
if(n->type->width < widthptr) {
switch(n->op) {
case OADD:
case OSUB:
case OMUL:
gins(optoas(OAS, n->type), &n1, &n1);
break;
}
}
gmove(&n1, res); gmove(&n1, res);
regfree(&n1); regfree(&n1);
if(n2.op != OLITERAL) if(n2.op != OLITERAL)
......
...@@ -640,6 +640,8 @@ cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) ...@@ -640,6 +640,8 @@ cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
gshift(AMOVW, &n2, SHIFT_LL, v, &n1); gshift(AMOVW, &n2, SHIFT_LL, v, &n1);
gshift(AORR, &n2, SHIFT_LR, w-v, &n1); gshift(AORR, &n2, SHIFT_LR, w-v, &n1);
regfree(&n2); regfree(&n2);
// Ensure sign/zero-extended result.
gins(optoas(OAS, nl->type), &n1, &n1);
} }
gmove(&n1, res); gmove(&n1, res);
regfree(&n1); regfree(&n1);
...@@ -665,6 +667,8 @@ cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) ...@@ -665,6 +667,8 @@ cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
else // OLSH else // OLSH
gshift(AMOVW, &n1, SHIFT_LL, sc, &n1); gshift(AMOVW, &n1, SHIFT_LL, sc, &n1);
} }
if(w < 32 && op == OLSH)
gins(optoas(OAS, nl->type), &n1, &n1);
gmove(&n1, res); gmove(&n1, res);
regfree(&n1); regfree(&n1);
return; return;
...@@ -738,6 +742,9 @@ cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res) ...@@ -738,6 +742,9 @@ cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
regfree(&n3); regfree(&n3);
patch(p3, pc); patch(p3, pc);
// Left-shift of smaller word must be sign/zero-extended.
if(w < 32 && op == OLSH)
gins(optoas(OAS, nl->type), &n2, &n2);
gmove(&n2, res); gmove(&n2, res);
regfree(&n1); regfree(&n1);
...@@ -798,7 +805,7 @@ clearfat(Node *nl) ...@@ -798,7 +805,7 @@ clearfat(Node *nl)
} }
while(c > 0) { while(c > 0) {
p = gins(AMOVBU, &nz, &dst); p = gins(AMOVB, &nz, &dst);
p->to.type = D_OREG; p->to.type = D_OREG;
p->to.offset = 1; p->to.offset = 1;
p->scond |= C_PBIT; p->scond |= C_PBIT;
......
...@@ -706,16 +706,24 @@ gmove(Node *f, Node *t) ...@@ -706,16 +706,24 @@ gmove(Node *f, Node *t)
* integer copy and truncate * integer copy and truncate
*/ */
case CASE(TINT8, TINT8): // same size case CASE(TINT8, TINT8): // same size
if(!ismem(f)) {
a = AMOVB;
break;
}
case CASE(TUINT8, TINT8): case CASE(TUINT8, TINT8):
case CASE(TINT16, TINT8): // truncate case CASE(TINT16, TINT8): // truncate
case CASE(TUINT16, TINT8): case CASE(TUINT16, TINT8):
case CASE(TINT32, TINT8): case CASE(TINT32, TINT8):
case CASE(TUINT32, TINT8): case CASE(TUINT32, TINT8):
a = AMOVB; a = AMOVBS;
break; break;
case CASE(TINT8, TUINT8):
case CASE(TUINT8, TUINT8): case CASE(TUINT8, TUINT8):
if(!ismem(f)) {
a = AMOVB;
break;
}
case CASE(TINT8, TUINT8):
case CASE(TINT16, TUINT8): case CASE(TINT16, TUINT8):
case CASE(TUINT16, TUINT8): case CASE(TUINT16, TUINT8):
case CASE(TINT32, TUINT8): case CASE(TINT32, TUINT8):
...@@ -725,7 +733,7 @@ gmove(Node *f, Node *t) ...@@ -725,7 +733,7 @@ gmove(Node *f, Node *t)
case CASE(TINT64, TINT8): // truncate low word case CASE(TINT64, TINT8): // truncate low word
case CASE(TUINT64, TINT8): case CASE(TUINT64, TINT8):
a = AMOVB; a = AMOVBS;
goto trunc64; goto trunc64;
case CASE(TINT64, TUINT8): case CASE(TINT64, TUINT8):
...@@ -734,14 +742,22 @@ gmove(Node *f, Node *t) ...@@ -734,14 +742,22 @@ gmove(Node *f, Node *t)
goto trunc64; goto trunc64;
case CASE(TINT16, TINT16): // same size case CASE(TINT16, TINT16): // same size
if(!ismem(f)) {
a = AMOVH;
break;
}
case CASE(TUINT16, TINT16): case CASE(TUINT16, TINT16):
case CASE(TINT32, TINT16): // truncate case CASE(TINT32, TINT16): // truncate
case CASE(TUINT32, TINT16): case CASE(TUINT32, TINT16):
a = AMOVH; a = AMOVHS;
break; break;
case CASE(TINT16, TUINT16):
case CASE(TUINT16, TUINT16): case CASE(TUINT16, TUINT16):
if(!ismem(f)) {
a = AMOVH;
break;
}
case CASE(TINT16, TUINT16):
case CASE(TINT32, TUINT16): case CASE(TINT32, TUINT16):
case CASE(TUINT32, TUINT16): case CASE(TUINT32, TUINT16):
a = AMOVHU; a = AMOVHU;
...@@ -749,7 +765,7 @@ gmove(Node *f, Node *t) ...@@ -749,7 +765,7 @@ gmove(Node *f, Node *t)
case CASE(TINT64, TINT16): // truncate low word case CASE(TINT64, TINT16): // truncate low word
case CASE(TUINT64, TINT16): case CASE(TUINT64, TINT16):
a = AMOVH; a = AMOVHS;
goto trunc64; goto trunc64;
case CASE(TINT64, TUINT16): case CASE(TINT64, TUINT16):
...@@ -801,7 +817,7 @@ gmove(Node *f, Node *t) ...@@ -801,7 +817,7 @@ gmove(Node *f, Node *t)
case CASE(TINT8, TUINT16): case CASE(TINT8, TUINT16):
case CASE(TINT8, TINT32): case CASE(TINT8, TINT32):
case CASE(TINT8, TUINT32): case CASE(TINT8, TUINT32):
a = AMOVB; a = AMOVBS;
goto rdst; goto rdst;
case CASE(TINT8, TINT64): // convert via int32 case CASE(TINT8, TINT64): // convert via int32
case CASE(TINT8, TUINT64): case CASE(TINT8, TUINT64):
...@@ -821,7 +837,7 @@ gmove(Node *f, Node *t) ...@@ -821,7 +837,7 @@ gmove(Node *f, Node *t)
case CASE(TINT16, TINT32): // sign extend int16 case CASE(TINT16, TINT32): // sign extend int16
case CASE(TINT16, TUINT32): case CASE(TINT16, TUINT32):
a = AMOVH; a = AMOVHS;
goto rdst; goto rdst;
case CASE(TINT16, TINT64): // convert via int32 case CASE(TINT16, TINT64): // convert via int32
case CASE(TINT16, TUINT64): case CASE(TINT16, TUINT64):
...@@ -893,13 +909,13 @@ gmove(Node *f, Node *t) ...@@ -893,13 +909,13 @@ gmove(Node *f, Node *t)
ta = AMOVW; ta = AMOVW;
switch(tt) { switch(tt) {
case TINT8: case TINT8:
ta = AMOVB; ta = AMOVBS;
break; break;
case TUINT8: case TUINT8:
ta = AMOVBU; ta = AMOVBU;
break; break;
case TINT16: case TINT16:
ta = AMOVH; ta = AMOVHS;
break; break;
case TUINT16: case TUINT16:
ta = AMOVHU; ta = AMOVHU;
...@@ -940,13 +956,13 @@ gmove(Node *f, Node *t) ...@@ -940,13 +956,13 @@ gmove(Node *f, Node *t)
fa = AMOVW; fa = AMOVW;
switch(ft) { switch(ft) {
case TINT8: case TINT8:
fa = AMOVB; fa = AMOVBS;
break; break;
case TUINT8: case TUINT8:
fa = AMOVBU; fa = AMOVBU;
break; break;
case TINT16: case TINT16:
fa = AMOVH; fa = AMOVHS;
break; break;
case TUINT16: case TUINT16:
fa = AMOVHU; fa = AMOVHU;
...@@ -1189,7 +1205,7 @@ checkref(Node *n, int force) ...@@ -1189,7 +1205,7 @@ checkref(Node *n, int force)
m1.xoffset = 0; m1.xoffset = 0;
m1.op = OINDREG; m1.op = OINDREG;
m1.type = types[TUINT8]; m1.type = types[TUINT8];
gins(AMOVBU, &m1, &m2); gins(AMOVB, &m1, &m2);
regfree(&m2); regfree(&m2);
regfree(&m1); regfree(&m1);
} }
...@@ -1575,16 +1591,19 @@ optoas(int op, Type *t) ...@@ -1575,16 +1591,19 @@ optoas(int op, Type *t)
break; break;
case CASE(OAS, TBOOL): case CASE(OAS, TBOOL):
case CASE(OAS, TINT8):
a = AMOVB; a = AMOVB;
break; break;
case CASE(OAS, TINT8):
a = AMOVBS;
break;
case CASE(OAS, TUINT8): case CASE(OAS, TUINT8):
a = AMOVBU; a = AMOVBU;
break; break;
case CASE(OAS, TINT16): case CASE(OAS, TINT16):
a = AMOVH; a = AMOVHS;
break; break;
case CASE(OAS, TUINT16): case CASE(OAS, TUINT16):
......
...@@ -35,8 +35,11 @@ ...@@ -35,8 +35,11 @@
#include "opt.h" #include "opt.h"
int xtramodes(Reg*, Adr*); int xtramodes(Reg*, Adr*);
int shortprop(Reg *r);
int shiftprop(Reg *r); int shiftprop(Reg *r);
void constprop(Adr *c1, Adr *v1, Reg *r); void constprop(Adr *c1, Adr *v1, Reg *r);
Reg* findpre(Reg *r, Adr *v);
void predicate(void); void predicate(void);
int copyau1(Prog *p, Adr *v); int copyau1(Prog *p, Adr *v);
int isdconst(Addr *a); int isdconst(Addr *a);
...@@ -45,10 +48,9 @@ void ...@@ -45,10 +48,9 @@ void
peep(void) peep(void)
{ {
Reg *r, *r1, *r2; Reg *r, *r1, *r2;
Prog *p, *p1; Prog *p;
int t; int t;
p1 = nil;
/* /*
* complete R structure * complete R structure
*/ */
...@@ -101,6 +103,8 @@ loop1: ...@@ -101,6 +103,8 @@ loop1:
// } // }
break; break;
case AMOVB:
case AMOVH:
case AMOVW: case AMOVW:
case AMOVF: case AMOVF:
case AMOVD: case AMOVD:
...@@ -120,6 +124,16 @@ loop1: ...@@ -120,6 +124,16 @@ loop1:
} }
break; break;
case AMOVHS:
case AMOVHU:
case AMOVBS:
case AMOVBU:
if(p->from.type == D_REG) {
if(shortprop(r))
t++;
}
break;
#ifdef NOTDEF #ifdef NOTDEF
if(p->scond == C_SCOND_NONE) if(p->scond == C_SCOND_NONE)
if(regtyp(&p->to)) if(regtyp(&p->to))
...@@ -133,7 +147,6 @@ loop1: ...@@ -133,7 +147,6 @@ loop1:
if(t) if(t)
goto loop1; goto loop1;
for(r=firstr; r!=R; r=r->link) { for(r=firstr; r!=R; r=r->link) {
p = r->prog; p = r->prog;
switch(p->as) { switch(p->as) {
...@@ -151,30 +164,6 @@ loop1: ...@@ -151,30 +164,6 @@ loop1:
p->reg = NREG; p->reg = NREG;
} }
break; break;
case AMOVH:
case AMOVHS:
case AMOVHU:
case AMOVB:
case AMOVBS:
case AMOVBU:
/*
* look for MOVB x,R; MOVB R,R
*/
r1 = r->link;
if(p->to.type != D_REG)
break;
if(r1 == R)
break;
p1 = r1->prog;
if(p1->as != p->as)
break;
if(p1->from.type != D_REG || p1->from.reg != p->to.reg)
break;
if(p1->to.type != D_REG || p1->to.reg != p->to.reg)
break;
excise(r1);
break;
} }
} }
...@@ -393,6 +382,8 @@ subprop(Reg *r0) ...@@ -393,6 +382,8 @@ subprop(Reg *r0)
case AMOVF: case AMOVF:
case AMOVD: case AMOVD:
case AMOVB:
case AMOVH:
case AMOVW: case AMOVW:
if(p->to.type == v1->type) if(p->to.type == v1->type)
if(p->to.reg == v1->reg) if(p->to.reg == v1->reg)
...@@ -587,6 +578,64 @@ constprop(Adr *c1, Adr *v1, Reg *r) ...@@ -587,6 +578,64 @@ constprop(Adr *c1, Adr *v1, Reg *r)
} }
} }
/*
* shortprop eliminates redundant zero/sign extensions.
*
* MOVBS x, R
* <no use R>
* MOVBS R, R'
*
* changed to
*
* MOVBS x, R
* ...
* MOVB R, R' (compiled to mov)
*
* MOVBS above can be a MOVBS, MOVBU, MOVHS or MOVHU.
*/
int
shortprop(Reg *r)
{
Prog *p, *p1;
Reg *r1;
p = r->prog;
r1 = findpre(r, &p->from);
if(r1 == R)
return 0;
p1 = r1->prog;
if(p1->as == p->as) {
// Two consecutive extensions.
goto gotit;
}
if(p1->as == AMOVW && isdconst(&p1->from)
&& p1->from.offset >= 0 && p1->from.offset < 128) {
// Loaded an immediate.
goto gotit;
}
return 0;
gotit:
if(debug['P'])
print("shortprop\n%P\n%P", p1, p);
switch(p->as) {
case AMOVBS:
case AMOVBU:
p->as = AMOVB;
break;
case AMOVHS:
case AMOVHU:
p->as = AMOVH;
break;
}
if(debug['P'])
print(" => %A\n", p->as);
return 1;
}
/* /*
* ASLL x,y,w * ASLL x,y,w
* .. (not use w, not set x y w) * .. (not use w, not set x y w)
......
...@@ -822,7 +822,7 @@ addmove(Reg *r, int bn, int rn, int f) ...@@ -822,7 +822,7 @@ addmove(Reg *r, int bn, int rn, int f)
print("What is this %E\n", v->etype); print("What is this %E\n", v->etype);
case TINT8: case TINT8:
p1->as = AMOVB; p1->as = AMOVBS;
break; break;
case TBOOL: case TBOOL:
case TUINT8: case TUINT8:
...@@ -830,7 +830,7 @@ addmove(Reg *r, int bn, int rn, int f) ...@@ -830,7 +830,7 @@ addmove(Reg *r, int bn, int rn, int f)
p1->as = AMOVBU; p1->as = AMOVBU;
break; break;
case TINT16: case TINT16:
p1->as = AMOVH; p1->as = AMOVHS;
break; break;
case TUINT16: case TUINT16:
p1->as = AMOVHU; p1->as = AMOVHU;
......
...@@ -1639,6 +1639,8 @@ oprrr(int a, int sc) ...@@ -1639,6 +1639,8 @@ oprrr(int a, int sc)
case ACMP: return o | (0xa<<21) | (1<<20); case ACMP: return o | (0xa<<21) | (1<<20);
case ACMN: return o | (0xb<<21) | (1<<20); case ACMN: return o | (0xb<<21) | (1<<20);
case AORR: return o | (0xc<<21); case AORR: return o | (0xc<<21);
case AMOVB:
case AMOVH:
case AMOVW: return o | (0xd<<21); case AMOVW: return o | (0xd<<21);
case ABIC: return o | (0xe<<21); case ABIC: return o | (0xe<<21);
case AMVN: return o | (0xf<<21); case AMVN: return o | (0xf<<21);
......
...@@ -94,10 +94,10 @@ Optab optab[] = ...@@ -94,10 +94,10 @@ Optab optab[] =
{ AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM }, { AMVN, C_LCON, C_NONE, C_REG, 13, 8, 0, LFROM },
{ ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM }, { ACMP, C_LCON, C_REG, C_NONE, 13, 8, 0, LFROM },
{ AMOVB, C_REG, C_NONE, C_REG, 14, 8, 0 }, { AMOVB, C_REG, C_NONE, C_REG, 1, 4, 0 },
{ AMOVBS, C_REG, C_NONE, C_REG, 14, 8, 0 }, { AMOVBS, C_REG, C_NONE, C_REG, 14, 8, 0 },
{ AMOVBU, C_REG, C_NONE, C_REG, 58, 4, 0 }, { AMOVBU, C_REG, C_NONE, C_REG, 58, 4, 0 },
{ AMOVH, C_REG, C_NONE, C_REG, 14, 8, 0 }, { AMOVH, C_REG, C_NONE, C_REG, 1, 4, 0 },
{ AMOVHS, C_REG, C_NONE, C_REG, 14, 8, 0 }, { AMOVHS, C_REG, C_NONE, C_REG, 14, 8, 0 },
{ AMOVHU, C_REG, C_NONE, C_REG, 14, 8, 0 }, { AMOVHU, C_REG, C_NONE, C_REG, 14, 8, 0 },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment