Commit 2dd16a32 authored by Ken Thompson's avatar Ken Thompson

first cut at optimizing

R=r
OCL=19564
CL=19564
parent 9dc4b1ca
......@@ -11,6 +11,7 @@ HFILES=\
../gc/go.h\
../6l/6.out.h\
gg.h\
opt.h\
OFILES=\
list.$O\
......@@ -19,6 +20,9 @@ OFILES=\
cgen.$O\
gsubr.$O\
obj.$O\
peep.$O\
reg.$O\
bits.$O\
../6l/enam.$O\
LIB=\
......
// Inferno utils/cc/bits.c
// http://code.google.com/p/inferno-os/source/browse/utils/cc/bits.c
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
// Portions Copyright © 1997-1999 Vita Nuova Limited
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
// Portions Copyright © 2004,2006 Bruce Ellis
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
// Portions Copyright © 2009 The Go Authors. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "gg.h"
#include "opt.h"
Bits
bor(Bits a, Bits b)
{
Bits c;
int i;
for(i=0; i<BITS; i++)
c.b[i] = a.b[i] | b.b[i];
return c;
}
Bits
band(Bits a, Bits b)
{
Bits c;
int i;
for(i=0; i<BITS; i++)
c.b[i] = a.b[i] & b.b[i];
return c;
}
/*
Bits
bnot(Bits a)
{
Bits c;
int i;
for(i=0; i<BITS; i++)
c.b[i] = ~a.b[i];
return c;
}
*/
int
bany(Bits *a)
{
int i;
for(i=0; i<BITS; i++)
if(a->b[i])
return 1;
return 0;
}
int
beq(Bits a, Bits b)
{
int i;
for(i=0; i<BITS; i++)
if(a.b[i] != b.b[i])
return 0;
return 1;
}
int
bnum(Bits a)
{
int i;
int32 b;
for(i=0; i<BITS; i++)
if(b = a.b[i])
return 32*i + bitno(b);
fatal("bad in bnum");
return 0;
}
Bits
blsh(uint n)
{
Bits c;
c = zbits;
c.b[n/32] = 1L << (n%32);
return c;
}
int
bset(Bits a, uint n)
{
if(a.b[n/32] & (1L << (n%32)))
return 1;
return 0;
}
int
bitno(int32 b)
{
int i;
for(i=0; i<32; i++)
if(b & (1L<<i))
return i;
fatal("bad in bitno");
return 0;
}
int
Qconv(Fmt *fp)
{
char str[STRINGSZ], ss[STRINGSZ], *s;
Bits bits;
int i;
str[0] = 0;
bits = va_arg(fp->args, Bits);
while(bany(&bits)) {
i = bnum(bits);
if(str[0])
strcat(str, " ");
if(var[i].sym == S) {
sprint(ss, "$%lld", var[i].offset);
s = ss;
} else
s = var[i].sym->name;
if(strlen(str) + strlen(s) + 1 >= STRINGSZ)
break;
strcat(str, s);
bits.b[i/32] &= ~(1L << (i%32));
}
return fmtstrcpy(fp, str);
}
......@@ -99,10 +99,9 @@ if(throwreturn == N) {
pc->as = ARET; // overwrite AEND
pc->lineno = lineno;
// if(debug['N']) {
// regopt(ptxt);
// debug['N'] = 0;
// }
if(debug['N']) {
regopt(ptxt);
}
// fill in argument size
ptxt->to.offset = rnd(curfn->type->argwid, maxround);
......
......@@ -39,6 +39,7 @@ struct Prog
Addr from; // src address
Addr to; // dst address
Prog* link; // next instruction in this func
void* reg; // pointer to containing Reg struct
};
#define P ((Prog*)0)
......@@ -102,7 +103,6 @@ EXTERN Pool* poolast;
EXTERN Biobuf* bout;
EXTERN int32 dynloc;
EXTERN uchar reg[D_NONE];
EXTERN ushort txt[NTYPE*NTYPE];
EXTERN int32 maxround;
EXTERN int32 widthptr;
EXTERN Sym* symstringo; // string objects
......
......@@ -829,76 +829,6 @@ gmove(Node *f, Node *t)
gins(a, f, t);
}
void
buildtxt(void)
{
Type t1, t2;
int i, j, a;
memset(&t1, 0, sizeof(t1));
memset(&t2, 0, sizeof(t2));
for(i=0; i<NTYPE; i++)
for(j=0; j<NTYPE; j++) {
a = AGOK;
txt[i*NTYPE+j] = a;
t1.etype = i;
t2.etype = j;
if(isint[i] || isptr[i] || i==TBOOL) {
if(isint[j] || isptr[j] || j==TBOOL) {
dowidth(&t1);
dowidth(&t2);
if(t1.width >= t2.width) {
a = AMOVL;
if(t1.width >= 8)
a = AMOVQ;
txt[i*NTYPE+j] = a;
continue;
}
switch(i) {
case TINT8:
a = AMOVBLSX;
if(t1.width >= 8)
a = AMOVBQSX;
break;
case TINT16:
a = AMOVWLSX;
if(t1.width >= 8)
a = AMOVWQSX;
break;
case TINT32:
a = AMOVLQSX;
break;
case TBOOL:
case TUINT8:
a = AMOVBLZX;
if(t1.width >= 8)
a = AMOVBQZX;
break;
case TUINT16:
a = AMOVWLZX;
if(t1.width >= 8)
a = AMOVLQZX;
break;
case TPTR32:
case TUINT32:
a = AMOVWQZX;
break;
}
txt[i*NTYPE+j] = a;
continue;
}
if(isfloat[j]) {
}
}
if(isint[j] || isptr[j] || j==TBOOL) {
if(isfloat[i]) {
}
}
}
}
void
regsalloc(Node *f, Type *t)
{
......@@ -1000,7 +930,9 @@ naddr(Node *n, Addr *a)
break;
case ONAME:
a->etype = n->etype;
a->etype = 0;
if(n->type != T)
a->etype = n->type->etype;
a->offset = n->xoffset;
a->sym = n->sym;
if(a->sym == S)
......
......@@ -90,6 +90,15 @@ dumpobj(void)
}
sym = 1;
// fix up pc
pcloc = 0;
for(pl=plist; pl!=nil; pl=pl->link) {
for(p=pl->firstpc; p!=P; p=p->link) {
p->loc = pcloc;
pcloc++;
}
}
// put out functions
for(pl=plist; pl!=nil; pl=pl->link) {
......@@ -204,8 +213,13 @@ zaddr(Biobuf *b, Addr *a, int s)
t |= T_SYM;
switch(a->type) {
case D_BRANCH:
a->offset = a->branch->loc;
default:
t |= T_TYPE;
case D_NONE:
if(a->offset != 0) {
t |= T_OFFSET;
......
// Derived from Inferno utils/6c/gc.h
// http://code.google.com/p/inferno-os/source/browse/utils/6c/gc.h
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
// Portions Copyright © 1997-1999 Vita Nuova Limited
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
// Portions Copyright © 2004,2006 Bruce Ellis
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
// Portions Copyright © 2009 The Go Authors. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#define Z N
#define Adr Addr
#define BITS 5
#define NVAR (BITS*sizeof(uint32)*8)
#define D_HI D_NONE
#define D_LO D_NONE
#define isregtype(t) ((t)>= D_AX && (t)<=D_R15)
#define BLOAD(r) band(bnot(r->refbehind), r->refahead)
#define BSTORE(r) band(bnot(r->calbehind), r->calahead)
#define LOAD(r) (~r->refbehind.b[z] & r->refahead.b[z])
#define STORE(r) (~r->calbehind.b[z] & r->calahead.b[z])
#define CLOAD 5
#define CREF 5
#define CINF 1000
#define LOOP 3
typedef struct Bits Bits;
typedef struct Reg Reg;
typedef struct Var Var;
typedef struct Rgn Rgn;
struct Bits
{
uint32 b[BITS];
};
struct Reg
{
Bits set;
Bits use1;
Bits use2;
Bits refbehind;
Bits refahead;
Bits calbehind;
Bits calahead;
Bits regdiff;
Bits act;
int32 regu;
int32 loop; /* could be shorter */
int32 rpo; /* reverse post ordering */
int32 active;
// uint32 magic;
// int32 pc;
// Reg* log5;
Reg* p1;
Reg* p2;
Reg* p2link;
Reg* s1;
Reg* s2;
Reg* link;
Prog* prog;
};
#define R ((Reg*)0)
struct Var
{
vlong offset;
Sym* sym;
char name;
char etype;
};
#define NRGN 600
struct Rgn
{
Reg* enter;
short cost;
short varno;
short regno;
};
EXTERN int32 exregoffset; // not set
EXTERN int32 exfregoffset; // not set
EXTERN Reg* firstr;
EXTERN Reg* lastr;
EXTERN Reg zreg;
EXTERN Reg* freer;
EXTERN Var var[NVAR];
EXTERN Reg** rpo2r;
EXTERN Rgn region[NRGN];
EXTERN Rgn* rgp;
EXTERN int nregion;
EXTERN int nvar;
EXTERN int32 regbits;
EXTERN int32 exregbits;
EXTERN Bits externs;
EXTERN Bits params;
EXTERN Bits consts;
EXTERN Bits addrs;
EXTERN int change;
EXTERN Bits zbits;
EXTERN uchar typechlpfd[NTYPE]; // botch
EXTERN uchar typev[NTYPE]; // botch
EXTERN int32 maxnr;
EXTERN int32* idom;
/*
* bits.c
*/
Bits bor(Bits, Bits);
Bits band(Bits, Bits);
Bits bnot(Bits);
int bany(Bits*);
int bnum(Bits);
Bits blsh(uint);
int beq(Bits, Bits);
int bset(Bits, uint);
int Qconv(Fmt *fp);
/*
* reg.c
*/
Reg* rega(void);
int rcmp(const void*, const void*);
void regopt(Prog*);
void addmove(Reg*, int, int, int);
Bits mkvar(Reg*, Adr*);
void prop(Reg*, Bits, Bits);
void loopit(Reg*, int32);
void synch(Reg*, Bits);
uint32 allreg(uint32, Rgn*);
void paint1(Reg*, int);
uint32 paint2(Reg*, int);
void paint3(Reg*, int, int32, int);
void addreg(Adr*, int);
/*
* peep.c
*/
void peep(void);
void excise(Reg*);
Reg* uniqp(Reg*);
Reg* uniqs(Reg*);
int regtyp(Adr*);
int anyvar(Adr*);
int subprop(Reg*);
int copyprop(Reg*);
int copy1(Adr*, Adr*, Reg*, int);
int copyu(Prog*, Adr*, Adr*);
int copyas(Adr*, Adr*);
int copyau(Adr*, Adr*);
int copysub(Adr*, Adr*, Adr*, int);
int copysub1(Prog*, Adr*, Adr*, int);
int32 RtoB(int);
int32 FtoB(int);
int BtoR(int32);
int BtoF(int32);
// Derived from Inferno utils/6c/peep.c
// http://code.google.com/p/inferno-os/source/browse/utils/6c/peep.c
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
// Portions Copyright © 1997-1999 Vita Nuova Limited
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
// Portions Copyright © 2004,2006 Bruce Ellis
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
// Portions Copyright © 2009 The Go Authors. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "gg.h"
#include "opt.h"
static int
needc(Prog *p)
{
while(p != P) {
switch(p->as) {
case AADCL:
case AADCQ:
case ASBBL:
case ASBBQ:
case ARCRL:
case ARCRQ:
return 1;
case AADDL:
case AADDQ:
case ASUBL:
case ASUBQ:
case AJMP:
case ARET:
case ACALL:
return 0;
default:
if(p->to.type == D_BRANCH)
return 0;
}
p = p->link;
}
return 0;
}
static Reg*
rnops(Reg *r)
{
Prog *p;
Reg *r1;
if(r != R)
for(;;){
p = r->prog;
if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE)
break;
r1 = uniqs(r);
if(r1 == R)
break;
r = r1;
}
return r;
}
void
peep(void)
{
Reg *r, *r1, *r2;
Prog *p, *p1;
int t;
/*
* complete R structure
*/
t = 0;
for(r=firstr; r!=R; r=r1) {
r1 = r->link;
if(r1 == R)
break;
p = r->prog->link;
while(p != r1->prog)
switch(p->as) {
default:
r2 = rega();
r->link = r2;
r2->link = r1;
r2->prog = p;
r2->p1 = r;
r->s1 = r2;
r2->s1 = r1;
r1->p1 = r2;
r = r2;
t++;
case ADATA:
case AGLOBL:
case ANAME:
case ASIGNAME:
p = p->link;
}
}
pc = 0; /* speculating it won't kill */
loop1:
t = 0;
for(r=firstr; r!=R; r=r->link) {
p = r->prog;
switch(p->as) {
case AMOVL:
case AMOVQ:
case AMOVSS:
case AMOVSD:
if(regtyp(&p->to))
if(regtyp(&p->from)) {
if(copyprop(r)) {
excise(r);
t++;
} else
if(subprop(r) && copyprop(r)) {
excise(r);
t++;
}
}
break;
case AMOVBLZX:
case AMOVWLZX:
case AMOVBLSX:
case AMOVWLSX:
if(regtyp(&p->to)) {
r1 = rnops(uniqs(r));
if(r1 != R) {
p1 = r1->prog;
if(p->as == p1->as && p->to.type == p1->from.type){
p1->as = AMOVL;
t++;
}
}
}
break;
case AMOVBQSX:
case AMOVBQZX:
case AMOVWQSX:
case AMOVWQZX:
case AMOVLQSX:
case AMOVLQZX:
if(regtyp(&p->to)) {
r1 = rnops(uniqs(r));
if(r1 != R) {
p1 = r1->prog;
if(p->as == p1->as && p->to.type == p1->from.type){
p1->as = AMOVQ;
t++;
}
}
}
break;
case AADDL:
case AADDQ:
case AADDW:
if(p->from.type != D_CONST || needc(p->link))
break;
if(p->from.offset == -1){
if(p->as == AADDQ)
p->as = ADECQ;
else if(p->as == AADDL)
p->as = ADECL;
else
p->as = ADECW;
p->from = zprog.from;
}
else if(p->from.offset == 1){
if(p->as == AADDQ)
p->as = AINCQ;
else if(p->as == AADDL)
p->as = AINCL;
else
p->as = AINCW;
p->from = zprog.from;
}
break;
case ASUBL:
case ASUBQ:
case ASUBW:
if(p->from.type != D_CONST || needc(p->link))
break;
if(p->from.offset == -1) {
if(p->as == ASUBQ)
p->as = AINCQ;
else if(p->as == ASUBL)
p->as = AINCL;
else
p->as = AINCW;
p->from = zprog.from;
}
else if(p->from.offset == 1){
if(p->as == ASUBQ)
p->as = ADECQ;
else if(p->as == ASUBL)
p->as = ADECL;
else
p->as = ADECW;
p->from = zprog.from;
}
break;
}
}
if(t)
goto loop1;
}
void
excise(Reg *r)
{
Prog *p;
p = r->prog;
p->as = ANOP;
p->from = zprog.from;
p->to = zprog.to;
}
Reg*
uniqp(Reg *r)
{
Reg *r1;
r1 = r->p1;
if(r1 == R) {
r1 = r->p2;
if(r1 == R || r1->p2link != R)
return R;
} else
if(r->p2 != R)
return R;
return r1;
}
Reg*
uniqs(Reg *r)
{
Reg *r1;
r1 = r->s1;
if(r1 == R) {
r1 = r->s2;
if(r1 == R)
return R;
} else
if(r->s2 != R)
return R;
return r1;
}
int
regtyp(Adr *a)
{
int t;
t = a->type;
if(t >= D_AX && t <= D_R15)
return 1;
if(t >= D_X0 && t <= D_X0+15)
return 1;
return 0;
}
/*
* the idea is to substitute
* one register for another
* from one MOV to another
* MOV a, R0
* ADD b, R0 / no use of R1
* MOV R0, R1
* would be converted to
* MOV a, R1
* ADD b, R1
* MOV R1, R0
* hopefully, then the former or latter MOV
* will be eliminated by copy propagation.
*/
int
subprop(Reg *r0)
{
Prog *p;
Adr *v1, *v2;
Reg *r;
int t;
p = r0->prog;
v1 = &p->from;
if(!regtyp(v1))
return 0;
v2 = &p->to;
if(!regtyp(v2))
return 0;
for(r=uniqp(r0); r!=R; r=uniqp(r)) {
if(uniqs(r) == R)
break;
p = r->prog;
switch(p->as) {
case ACALL:
return 0;
case AIMULL:
case AIMULQ:
case AIMULW:
if(p->to.type != D_NONE)
break;
case ADIVB:
case ADIVL:
case ADIVQ:
case ADIVW:
case AIDIVB:
case AIDIVL:
case AIDIVQ:
case AIDIVW:
case AIMULB:
case AMULB:
case AMULL:
case AMULQ:
case AMULW:
case AROLB:
case AROLL:
case AROLQ:
case AROLW:
case ARORB:
case ARORL:
case ARORQ:
case ARORW:
case ASALB:
case ASALL:
case ASALQ:
case ASALW:
case ASARB:
case ASARL:
case ASARQ:
case ASARW:
case ASHLB:
case ASHLL:
case ASHLQ:
case ASHLW:
case ASHRB:
case ASHRL:
case ASHRQ:
case ASHRW:
case AREP:
case AREPN:
case ACWD:
case ACDQ:
case ACQO:
case AMOVSL:
case AMOVSQ:
return 0;
case AMOVL:
case AMOVQ:
if(p->to.type == v1->type)
goto gotit;
break;
}
if(copyau(&p->from, v2) ||
copyau(&p->to, v2))
break;
if(copysub(&p->from, v1, v2, 0) ||
copysub(&p->to, v1, v2, 0))
break;
}
return 0;
gotit:
copysub(&p->to, v1, v2, 1);
if(debug['P']) {
print("gotit: %D->%D\n%P", v1, v2, r->prog);
if(p->from.type == v2->type)
print(" excise");
print("\n");
}
for(r=uniqs(r); r!=r0; r=uniqs(r)) {
p = r->prog;
copysub(&p->from, v1, v2, 1);
copysub(&p->to, v1, v2, 1);
if(debug['P'])
print("%P\n", r->prog);
}
t = v1->type;
v1->type = v2->type;
v2->type = t;
if(debug['P'])
print("%P last\n", r->prog);
return 1;
}
/*
* The idea is to remove redundant copies.
* v1->v2 F=0
* (use v2 s/v2/v1/)*
* set v1 F=1
* use v2 return fail
* -----------------
* v1->v2 F=0
* (use v2 s/v2/v1/)*
* set v1 F=1
* set v2 return success
*/
int
copyprop(Reg *r0)
{
Prog *p;
Adr *v1, *v2;
Reg *r;
p = r0->prog;
v1 = &p->from;
v2 = &p->to;
if(copyas(v1, v2))
return 1;
for(r=firstr; r!=R; r=r->link)
r->active = 0;
return copy1(v1, v2, r0->s1, 0);
}
int
copy1(Adr *v1, Adr *v2, Reg *r, int f)
{
int t;
Prog *p;
if(r->active) {
if(debug['P'])
print("act set; return 1\n");
return 1;
}
r->active = 1;
if(debug['P'])
print("copy %D->%D f=%d\n", v1, v2, f);
for(; r != R; r = r->s1) {
p = r->prog;
if(debug['P'])
print("%P", p);
if(!f && uniqp(r) == R) {
f = 1;
if(debug['P'])
print("; merge; f=%d", f);
}
t = copyu(p, v2, A);
switch(t) {
case 2: /* rar, cant split */
if(debug['P'])
print("; %D rar; return 0\n", v2);
return 0;
case 3: /* set */
if(debug['P'])
print("; %D set; return 1\n", v2);
return 1;
case 1: /* used, substitute */
case 4: /* use and set */
if(f) {
if(!debug['P'])
return 0;
if(t == 4)
print("; %D used+set and f=%d; return 0\n", v2, f);
else
print("; %D used and f=%d; return 0\n", v2, f);
return 0;
}
if(copyu(p, v2, v1)) {
if(debug['P'])
print("; sub fail; return 0\n");
return 0;
}
if(debug['P'])
print("; sub %D/%D", v2, v1);
if(t == 4) {
if(debug['P'])
print("; %D used+set; return 1\n", v2);
return 1;
}
break;
}
if(!f) {
t = copyu(p, v1, A);
if(!f && (t == 2 || t == 3 || t == 4)) {
f = 1;
if(debug['P'])
print("; %D set and !f; f=%d", v1, f);
}
}
if(debug['P'])
print("\n");
if(r->s2)
if(!copy1(v1, v2, r->s2, f))
return 0;
}
return 1;
}
/*
* return
* 1 if v only used (and substitute),
* 2 if read-alter-rewrite
* 3 if set
* 4 if set and used
* 0 otherwise (not touched)
*/
int
copyu(Prog *p, Adr *v, Adr *s)
{
switch(p->as) {
default:
if(debug['P'])
print("unknown op %A\n", p->as);
/* SBBL; ADCL; FLD1; SAHF */
return 2;
case ANEGB:
case ANEGW:
case ANEGL:
case ANEGQ:
case ANOTB:
case ANOTW:
case ANOTL:
case ANOTQ:
if(copyas(&p->to, v))
return 2;
break;
case ALEAL: /* lhs addr, rhs store */
case ALEAQ:
if(copyas(&p->from, v))
return 2;
case ANOP: /* rhs store */
case AMOVL:
case AMOVQ:
case AMOVBLSX:
case AMOVBLZX:
case AMOVBQSX:
case AMOVBQZX:
case AMOVLQSX:
case AMOVLQZX:
case AMOVWLSX:
case AMOVWLZX:
case AMOVWQSX:
case AMOVWQZX:
case AMOVSS:
case AMOVSD:
case ACVTSD2SL:
case ACVTSD2SQ:
case ACVTSD2SS:
case ACVTSL2SD:
case ACVTSL2SS:
case ACVTSQ2SD:
case ACVTSQ2SS:
case ACVTSS2SD:
case ACVTSS2SL:
case ACVTSS2SQ:
case ACVTTSD2SL:
case ACVTTSD2SQ:
case ACVTTSS2SL:
case ACVTTSS2SQ:
if(copyas(&p->to, v)) {
if(s != A)
return copysub(&p->from, v, s, 1);
if(copyau(&p->from, v))
return 4;
return 3;
}
goto caseread;
case AROLB:
case AROLL:
case AROLQ:
case AROLW:
case ARORB:
case ARORL:
case ARORQ:
case ARORW:
case ASALB:
case ASALL:
case ASALQ:
case ASALW:
case ASARB:
case ASARL:
case ASARQ:
case ASARW:
case ASHLB:
case ASHLL:
case ASHLQ:
case ASHLW:
case ASHRB:
case ASHRL:
case ASHRQ:
case ASHRW:
if(copyas(&p->to, v))
return 2;
if(copyas(&p->from, v))
if(p->from.type == D_CX)
return 2;
goto caseread;
case AADDB: /* rhs rar */
case AADDL:
case AADDQ:
case AADDW:
case AANDB:
case AANDL:
case AANDQ:
case AANDW:
case ADECL:
case ADECQ:
case ADECW:
case AINCL:
case AINCQ:
case AINCW:
case ASUBB:
case ASUBL:
case ASUBQ:
case ASUBW:
case AORB:
case AORL:
case AORQ:
case AORW:
case AXORB:
case AXORL:
case AXORQ:
case AXORW:
case AMOVB:
case AMOVW:
case AADDSD:
case AADDSS:
case ACMPSD:
case ACMPSS:
case ADIVSD:
case ADIVSS:
case AMAXSD:
case AMAXSS:
case AMINSD:
case AMINSS:
case AMULSD:
case AMULSS:
case ARCPSS:
case ARSQRTSS:
case ASQRTSD:
case ASQRTSS:
case ASUBSD:
case ASUBSS:
case AXORPD:
if(copyas(&p->to, v))
return 2;
goto caseread;
case ACMPL: /* read only */
case ACMPW:
case ACMPB:
case ACMPQ:
case ACOMISD:
case ACOMISS:
case AUCOMISD:
case AUCOMISS:
caseread:
if(s != A) {
if(copysub(&p->from, v, s, 1))
return 1;
return copysub(&p->to, v, s, 1);
}
if(copyau(&p->from, v))
return 1;
if(copyau(&p->to, v))
return 1;
break;
case AJGE: /* no reference */
case AJNE:
case AJLE:
case AJEQ:
case AJHI:
case AJLS:
case AJMI:
case AJPL:
case AJGT:
case AJLT:
case AJCC:
case AJCS:
case AADJSP:
case AWAIT:
case ACLD:
break;
case AIMULL:
case AIMULQ:
case AIMULW:
if(p->to.type != D_NONE) {
if(copyas(&p->to, v))
return 2;
goto caseread;
}
case ADIVB:
case ADIVL:
case ADIVQ:
case ADIVW:
case AIDIVB:
case AIDIVL:
case AIDIVQ:
case AIDIVW:
case AIMULB:
case AMULB:
case AMULL:
case AMULQ:
case AMULW:
case ACWD:
case ACDQ:
case ACQO:
if(v->type == D_AX || v->type == D_DX)
return 2;
goto caseread;
case AMOVSL:
case AMOVSQ:
case AREP:
case AREPN:
if(v->type == D_CX || v->type == D_DI || v->type == D_SI)
return 2;
goto caseread;
case AJMP: /* funny */
if(s != A) {
if(copysub(&p->to, v, s, 1))
return 1;
return 0;
}
if(copyau(&p->to, v))
return 1;
return 0;
case ARET: /* funny */
if(v->type == REGRET || v->type == FREGRET)
return 2;
if(s != A)
return 1;
return 3;
case ACALL: /* funny */
if(REGEXT && v->type <= REGEXT && v->type > exregoffset)
return 2;
if(REGARG && v->type == REGARG)
return 2;
if(s != A) {
if(copysub(&p->to, v, s, 1))
return 1;
return 0;
}
if(copyau(&p->to, v))
return 4;
return 3;
case ATEXT: /* funny */
if(REGARG && v->type == REGARG)
return 3;
return 0;
}
return 0;
}
/*
* direct reference,
* could be set/use depending on
* semantics
*/
int
copyas(Adr *a, Adr *v)
{
if(a->type != v->type)
return 0;
if(regtyp(v))
return 1;
if(v->type == D_AUTO || v->type == D_PARAM)
if(v->offset == a->offset)
return 1;
return 0;
}
/*
* either direct or indirect
*/
int
copyau(Adr *a, Adr *v)
{
if(copyas(a, v))
return 1;
if(regtyp(v)) {
if(a->type-D_INDIR == v->type)
return 1;
if(a->index == v->type)
return 1;
}
return 0;
}
/*
* substitute s for v in a
* return failure to substitute
*/
int
copysub(Adr *a, Adr *v, Adr *s, int f)
{
int t;
if(copyas(a, v)) {
t = s->type;
if(t >= D_AX && t <= D_R15 || t >= D_X0 && t <= D_X0+15) {
if(f)
a->type = t;
}
return 0;
}
if(regtyp(v)) {
t = v->type;
if(a->type == t+D_INDIR) {
if((s->type == D_BP || s->type == D_R13) && a->index != D_NONE)
return 1; /* can't use BP-base with index */
if(f)
a->type = s->type+D_INDIR;
// return 0;
}
if(a->index == t) {
if(f)
a->index = s->type;
return 0;
}
return 0;
}
return 0;
}
// Derived from Inferno utils/6c/reg.c
// http://code.google.com/p/inferno-os/source/browse/utils/6c/reg.c
//
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
// Portions Copyright © 1997-1999 Vita Nuova Limited
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
// Portions Copyright © 2004,2006 Bruce Ellis
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
// Portions Copyright © 2009 The Go Authors. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "gg.h"
#undef EXTERN
#define EXTERN
#include "opt.h"
#define P2R(p) (Reg*)(p->reg)
#define MAGIC 0xb00fbabe
static first = 1;
static void dumpit(char *str, Reg *r0);
static int noreturn(Prog *p);
Reg*
rega(void)
{
Reg *r;
r = freer;
if(r == R) {
r = mal(sizeof(*r));
} else
freer = r->link;
*r = zreg;
return r;
}
int
rcmp(const void *a1, const void *a2)
{
Rgn *p1, *p2;
int c1, c2;
p1 = (Rgn*)a1;
p2 = (Rgn*)a2;
c1 = p2->cost;
c2 = p1->cost;
if(c1 -= c2)
return c1;
return p2->varno - p1->varno;
}
void
regopt(Prog *firstp)
{
Reg *r, *r1, *r2;
Prog *p1, *p;
int i, z, nr;
uint32 vreg;
Bits bit;
if(first) {
fmtinstall('Q', Qconv);
first = 0;
}
firstr = R;
lastr = R;
nvar = 0;
regbits = RtoB(D_SP);
for(z=0; z<BITS; z++) {
externs.b[z] = 0;
params.b[z] = 0;
consts.b[z] = 0;
addrs.b[z] = 0;
}
/*
* pass 1
* build aux data structure
* allocate pcs
* find use and set of variables
*/
nr = 0;
for(p=firstp; p!=P; p=p->link) {
switch(p->as) {
case ADATA:
case AGLOBL:
case ANAME:
case ASIGNAME:
continue;
}
r = rega();
nr++;
if(firstr == R) {
firstr = r;
lastr = r;
} else {
lastr->link = r;
r->p1 = lastr;
lastr->s1 = r;
lastr = r;
}
r->prog = p;
p->reg = r;
r1 = r->p1;
if(r1 != R) {
switch(r1->prog->as) {
case ARET:
case AJMP:
case AIRETL:
case AIRETQ:
r->p1 = R;
r1->s1 = R;
}
}
bit = mkvar(r, &p->from);
if(bany(&bit))
switch(p->as) {
/*
* funny
*/
case ALEAL:
case ALEAQ:
for(z=0; z<BITS; z++)
addrs.b[z] |= bit.b[z];
break;
/*
* left side read
*/
default:
for(z=0; z<BITS; z++)
r->use1.b[z] |= bit.b[z];
break;
}
bit = mkvar(r, &p->to);
if(bany(&bit))
switch(p->as) {
default:
yyerror("reg: unknown op: %A", p->as);
break;
/*
* right side read
*/
case ACMPB:
case ACMPL:
case ACMPQ:
case ACMPW:
case ACOMISS:
case ACOMISD:
case AUCOMISS:
case AUCOMISD:
for(z=0; z<BITS; z++)
r->use2.b[z] |= bit.b[z];
break;
/*
* right side write
*/
case ANOP:
case AMOVL:
case AMOVQ:
case AMOVB:
case AMOVW:
case AMOVBLSX:
case AMOVBLZX:
case AMOVBQSX:
case AMOVBQZX:
case AMOVLQSX:
case AMOVLQZX:
case AMOVWLSX:
case AMOVWLZX:
case AMOVWQSX:
case AMOVWQZX:
case AMOVSS:
case AMOVSD:
case ACVTSD2SL:
case ACVTSD2SQ:
case ACVTSD2SS:
case ACVTSL2SD:
case ACVTSL2SS:
case ACVTSQ2SD:
case ACVTSQ2SS:
case ACVTSS2SD:
case ACVTSS2SL:
case ACVTSS2SQ:
case ACVTTSD2SL:
case ACVTTSD2SQ:
case ACVTTSS2SL:
case ACVTTSS2SQ:
for(z=0; z<BITS; z++)
r->set.b[z] |= bit.b[z];
break;
/*
* right side read+write
*/
case AADDB:
case AADDL:
case AADDQ:
case AADDW:
case AANDB:
case AANDL:
case AANDQ:
case AANDW:
case ASUBB:
case ASUBL:
case ASUBQ:
case ASUBW:
case AORB:
case AORL:
case AORQ:
case AORW:
case AXORB:
case AXORL:
case AXORQ:
case AXORW:
case ASALB:
case ASALL:
case ASALQ:
case ASALW:
case ASARB:
case ASARL:
case ASARQ:
case ASARW:
case AROLB:
case AROLL:
case AROLQ:
case AROLW:
case ARORB:
case ARORL:
case ARORQ:
case ARORW:
case ASHLB:
case ASHLL:
case ASHLQ:
case ASHLW:
case ASHRB:
case ASHRL:
case ASHRQ:
case ASHRW:
case AIMULL:
case AIMULQ:
case AIMULW:
case ANEGL:
case ANEGQ:
case ANOTL:
case ANOTQ:
case AADCL:
case AADCQ:
case ASBBL:
case ASBBQ:
case AADDSD:
case AADDSS:
case ACMPSD:
case ACMPSS:
case ADIVSD:
case ADIVSS:
case AMAXSD:
case AMAXSS:
case AMINSD:
case AMINSS:
case AMULSD:
case AMULSS:
case ARCPSS:
case ARSQRTSS:
case ASQRTSD:
case ASQRTSS:
case ASUBSD:
case ASUBSS:
case AXORPD:
for(z=0; z<BITS; z++) {
r->set.b[z] |= bit.b[z];
r->use2.b[z] |= bit.b[z];
}
break;
/*
* funny
*/
case ACALL:
for(z=0; z<BITS; z++)
addrs.b[z] |= bit.b[z];
break;
}
switch(p->as) {
case AIMULL:
case AIMULQ:
case AIMULW:
if(p->to.type != D_NONE)
break;
case AIDIVB:
case AIDIVL:
case AIDIVQ:
case AIDIVW:
case AIMULB:
case ADIVB:
case ADIVL:
case ADIVQ:
case ADIVW:
case AMULB:
case AMULL:
case AMULQ:
case AMULW:
case ACWD:
case ACDQ:
case ACQO:
r->regu |= RtoB(D_AX) | RtoB(D_DX);
break;
case AREP:
case AREPN:
case ALOOP:
case ALOOPEQ:
case ALOOPNE:
r->regu |= RtoB(D_CX);
break;
case AMOVSB:
case AMOVSL:
case AMOVSQ:
case AMOVSW:
case ACMPSB:
case ACMPSL:
case ACMPSQ:
case ACMPSW:
r->regu |= RtoB(D_SI) | RtoB(D_DI);
break;
case ASTOSB:
case ASTOSL:
case ASTOSQ:
case ASTOSW:
case ASCASB:
case ASCASL:
case ASCASQ:
case ASCASW:
r->regu |= RtoB(D_AX) | RtoB(D_DI);
break;
case AINSB:
case AINSL:
case AINSW:
case AOUTSB:
case AOUTSL:
case AOUTSW:
r->regu |= RtoB(D_DI) | RtoB(D_DX);
break;
}
}
if(firstr == R)
return;
//dumpit("pass1", firstr);
/*
* pass 2
* turn branch references to pointers
* build back pointers
*/
for(r=firstr; r!=R; r=r->link) {
p = r->prog;
if(p->to.type == D_BRANCH) {
if(p->to.branch == P)
fatal("pnil %P", p);
r1 = p->to.branch->reg;
if(r1 == R)
fatal("rnil %P", p);
if(r1 == r) {
fatal("ref to self %P", p);
continue;
}
r->s2 = r1;
r->p2link = r1->p2;
r1->p2 = r;
}
}
//dumpit("pass2", firstr);
/*
* pass 2.5
* find looping structure
*/
for(r = firstr; r != R; r = r->link)
r->active = 0;
change = 0;
loopit(firstr, nr);
//dumpit("pass2.5", firstr);
/*
* pass 3
* iterate propagating usage
* back until flow graph is complete
*/
loop1:
change = 0;
for(r = firstr; r != R; r = r->link)
r->active = 0;
for(r = firstr; r != R; r = r->link)
if(r->prog->as == ARET)
prop(r, zbits, zbits);
loop11:
/* pick up unreachable code */
i = 0;
for(r = firstr; r != R; r = r1) {
r1 = r->link;
if(r1 && r1->active && !r->active) {
prop(r, zbits, zbits);
i = 1;
}
}
if(i)
goto loop11;
if(change)
goto loop1;
//dumpit("pass3", firstr);
/*
* pass 4
* iterate propagating register/variable synchrony
* forward until graph is complete
*/
loop2:
change = 0;
for(r = firstr; r != R; r = r->link)
r->active = 0;
synch(firstr, zbits);
if(change)
goto loop2;
//dumpit("pass4", firstr);
/*
* pass 5
* isolate regions
* calculate costs (paint1)
*/
r = firstr;
if(r) {
for(z=0; z<BITS; z++)
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
if(bany(&bit)) {
warn("used and not set: %Q", bit);
if(debug['R'] && !debug['w'])
print("used and not set: %Q\n", bit);
}
}
for(r = firstr; r != R; r = r->link)
r->act = zbits;
rgp = region;
nregion = 0;
for(r = firstr; r != R; r = r->link) {
for(z=0; z<BITS; z++)
bit.b[z] = r->set.b[z] &
~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
if(bany(&bit)) {
warn("set and not used: %Q", bit);
if(debug['R'])
print("set and not used: %Q\n", bit);
excise(r);
}
for(z=0; z<BITS; z++)
bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
while(bany(&bit)) {
i = bnum(bit);
rgp->enter = r;
rgp->varno = i;
change = 0;
if(debug['R'] && debug['v'])
print("\n");
paint1(r, i);
bit.b[i/32] &= ~(1L<<(i%32));
if(change <= 0) {
if(debug['R'])
print("%L$%d: %Q\n",
r->prog->lineno, change, blsh(i));
continue;
}
rgp->cost = change;
nregion++;
if(nregion >= NRGN) {
fatal("too many regions");
goto brk;
}
rgp++;
}
}
brk:
qsort(region, nregion, sizeof(region[0]), rcmp);
/*
* pass 6
* determine used registers (paint2)
* replace code (paint3)
*/
rgp = region;
for(i=0; i<nregion; i++) {
bit = blsh(rgp->varno);
vreg = paint2(rgp->enter, rgp->varno);
vreg = allreg(vreg, rgp);
if(rgp->regno != 0)
paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
rgp++;
}
/*
* pass 7
* peep-hole on basic block
*/
if(debug['P']) {
peep();
}
/*
* eliminate nops
* free aux structures
*/
for(p=firstp; p!=P; p=p->link) {
while(p->link && p->link->as == ANOP)
p->link = p->link->link;
}
if(r1 != R) {
r1->link = freer;
freer = firstr;
}
}
/*
* add mov b,rn
* just after r
*/
void
addmove(Reg *r, int bn, int rn, int f)
{
Prog *p, *p1;
Adr *a;
Var *v;
p1 = mal(sizeof(*p1));
clearp(p1);
p1->loc = 9999;
p = r->prog;
p1->link = p->link;
p->link = p1;
p1->lineno = p->lineno;
v = var + bn;
a = &p1->to;
a->sym = v->sym;
a->offset = v->offset;
a->etype = v->etype;
a->type = v->name;
// need to chean this up with wptr and
// some of the defaults
p1->as = AMOVL;
switch(v->etype) {
default:
fatal("unknown type\n");
case TINT8:
case TUINT8:
case TBOOL:
p1->as = AMOVB;
break;
case TINT16:
case TUINT16:
p1->as = AMOVW;
break;
case TINT64:
case TUINT64:
case TUINTPTR:
case TPTR64:
p1->as = AMOVQ;
break;
case TFLOAT:
case TFLOAT32:
p1->as = AMOVSS;
break;
case TFLOAT64:
p1->as = AMOVSS;
break;
case TINT:
case TUINT:
case TINT32:
case TUINT32:
case TPTR32:
break;
}
p1->from.type = rn;
if(!f) {
p1->from = *a;
*a = zprog.from;
a->type = rn;
if(v->etype == TUINT8)
p1->as = AMOVB;
if(v->etype == TUINT16)
p1->as = AMOVW;
}
// if(debug['R'])
print("%P\t.a%P\n", p, p1);
}
uint32
doregbits(int r)
{
uint32 b;
b = 0;
if(r >= D_INDIR)
r -= D_INDIR;
if(r >= D_AX && r <= D_R15)
b |= RtoB(r);
else
if(r >= D_AL && r <= D_R15B)
b |= RtoB(r-D_AL+D_AX);
else
if(r >= D_AH && r <= D_BH)
b |= RtoB(r-D_AH+D_AX);
else
if(r >= D_X0 && r <= D_X0+15)
b |= FtoB(r);
return b;
}
Bits
mkvar(Reg *r, Adr *a)
{
Var *v;
int i, t, n, et, z;
int32 o;
Bits bit;
Sym *s;
/*
* mark registers used
*/
t = a->type;
r->regu |= doregbits(t);
r->regu |= doregbits(a->index);
switch(t) {
default:
goto none;
case D_ADDR:
a->type = a->index;
bit = mkvar(r, a);
for(z=0; z<BITS; z++)
addrs.b[z] |= bit.b[z];
a->type = t;
goto none;
case D_EXTERN:
case D_STATIC:
case D_PARAM:
case D_AUTO:
n = t;
break;
}
s = a->sym;
if(s == S)
goto none;
// if(s->name[0] == '.')
// goto none;
et = a->etype;
o = a->offset;
v = var;
for(i=0; i<nvar; i++) {
if(s == v->sym)
if(n == v->name)
if(o == v->offset)
goto out;
v++;
}
switch(et) {
case TFUNC:
case TARRAY:
case 0:
goto none;
}
if(nvar >= NVAR) {
if(debug['w'] > 1 && s)
fatal("variable not optimized: %s", s->name);
goto none;
}
i = nvar;
nvar++;
v = &var[i];
v->sym = s;
v->offset = o;
v->name = n;
v->etype = et;
if(debug['R'])
print("bit=%2d et=%2d %D\n", i, et, a);
out:
bit = blsh(i);
if(n == D_EXTERN || n == D_STATIC)
for(z=0; z<BITS; z++)
externs.b[z] |= bit.b[z];
if(n == D_PARAM)
for(z=0; z<BITS; z++)
params.b[z] |= bit.b[z];
if(v->etype != et) {
/* funny punning */
print("pun %d %d %S\n", v->etype, et, s);
for(z=0; z<BITS; z++)
addrs.b[z] |= bit.b[z];
}
return bit;
none:
return zbits;
}
void
prop(Reg *r, Bits ref, Bits cal)
{
Reg *r1, *r2;
int z;
for(r1 = r; r1 != R; r1 = r1->p1) {
for(z=0; z<BITS; z++) {
ref.b[z] |= r1->refahead.b[z];
if(ref.b[z] != r1->refahead.b[z]) {
r1->refahead.b[z] = ref.b[z];
change++;
}
cal.b[z] |= r1->calahead.b[z];
if(cal.b[z] != r1->calahead.b[z]) {
r1->calahead.b[z] = cal.b[z];
change++;
}
}
switch(r1->prog->as) {
case ACALL:
if(noreturn(r1->prog))
break;
for(z=0; z<BITS; z++) {
cal.b[z] |= ref.b[z] | externs.b[z];
ref.b[z] = 0;
}
break;
case ATEXT:
for(z=0; z<BITS; z++) {
cal.b[z] = 0;
ref.b[z] = 0;
}
break;
case ARET:
for(z=0; z<BITS; z++) {
cal.b[z] = externs.b[z];
ref.b[z] = 0;
}
}
for(z=0; z<BITS; z++) {
ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
r1->use1.b[z] | r1->use2.b[z];
cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
r1->refbehind.b[z] = ref.b[z];
r1->calbehind.b[z] = cal.b[z];
}
if(r1->active)
break;
r1->active = 1;
}
for(; r != r1; r = r->p1)
for(r2 = r->p2; r2 != R; r2 = r2->p2link)
prop(r2, r->refbehind, r->calbehind);
}
/*
* find looping structure
*
* 1) find reverse postordering
* 2) find approximate dominators,
* the actual dominators if the flow graph is reducible
* otherwise, dominators plus some other non-dominators.
* See Matthew S. Hecht and Jeffrey D. Ullman,
* "Analysis of a Simple Algorithm for Global Data Flow Problems",
* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
* Oct. 1-3, 1973, pp. 207-217.
* 3) find all nodes with a predecessor dominated by the current node.
* such a node is a loop head.
* recursively, all preds with a greater rpo number are in the loop
*/
int32
postorder(Reg *r, Reg **rpo2r, int32 n)
{
Reg *r1;
r->rpo = 1;
r1 = r->s1;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
r1 = r->s2;
if(r1 && !r1->rpo)
n = postorder(r1, rpo2r, n);
rpo2r[n] = r;
n++;
return n;
}
int32
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
{
int32 t;
if(rpo1 == -1)
return rpo2;
while(rpo1 != rpo2){
if(rpo1 > rpo2){
t = rpo2;
rpo2 = rpo1;
rpo1 = t;
}
while(rpo1 < rpo2){
t = idom[rpo2];
if(t >= rpo2)
fatal("bad idom");
rpo2 = t;
}
}
return rpo1;
}
int
doms(int32 *idom, int32 r, int32 s)
{
while(s > r)
s = idom[s];
return s == r;
}
int
loophead(int32 *idom, Reg *r)
{
int32 src;
src = r->rpo;
if(r->p1 != R && doms(idom, src, r->p1->rpo))
return 1;
for(r = r->p2; r != R; r = r->p2link)
if(doms(idom, src, r->rpo))
return 1;
return 0;
}
void
loopmark(Reg **rpo2r, int32 head, Reg *r)
{
if(r->rpo < head || r->active == head)
return;
r->active = head;
r->loop += LOOP;
if(r->p1 != R)
loopmark(rpo2r, head, r->p1);
for(r = r->p2; r != R; r = r->p2link)
loopmark(rpo2r, head, r);
}
void
loopit(Reg *r, int32 nr)
{
Reg *r1;
int32 i, d, me;
if(nr > maxnr) {
rpo2r = mal(nr * sizeof(Reg*));
idom = mal(nr * sizeof(int32));
maxnr = nr;
}
d = postorder(r, rpo2r, 0);
if(d > nr)
fatal("too many reg nodes %d %d", d, nr);
nr = d;
for(i = 0; i < nr / 2; i++) {
r1 = rpo2r[i];
rpo2r[i] = rpo2r[nr - 1 - i];
rpo2r[nr - 1 - i] = r1;
}
for(i = 0; i < nr; i++)
rpo2r[i]->rpo = i;
idom[0] = 0;
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
me = r1->rpo;
d = -1;
if(r1->p1 != R && r1->p1->rpo < me)
d = r1->p1->rpo;
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
if(r1->rpo < me)
d = rpolca(idom, d, r1->rpo);
idom[i] = d;
}
for(i = 0; i < nr; i++) {
r1 = rpo2r[i];
r1->loop++;
if(r1->p2 != R && loophead(idom, r1))
loopmark(rpo2r, i, r1);
}
}
void
synch(Reg *r, Bits dif)
{
Reg *r1;
int z;
for(r1 = r; r1 != R; r1 = r1->s1) {
for(z=0; z<BITS; z++) {
dif.b[z] = (dif.b[z] &
~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
r1->set.b[z] | r1->regdiff.b[z];
if(dif.b[z] != r1->regdiff.b[z]) {
r1->regdiff.b[z] = dif.b[z];
change++;
}
}
if(r1->active)
break;
r1->active = 1;
for(z=0; z<BITS; z++)
dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
if(r1->s2 != R)
synch(r1->s2, dif);
}
}
uint32
allreg(uint32 b, Rgn *r)
{
Var *v;
int i;
v = var + r->varno;
r->regno = 0;
switch(v->etype) {
default:
fatal("unknown etype %d/%d", bitno(b), v->etype);
break;
case TINT8:
case TUINT8:
case TINT16:
case TUINT16:
case TINT32:
case TUINT32:
case TINT64:
case TUINT64:
case TINT:
case TUINT:
case TUINTPTR:
case TBOOL:
case TPTR32:
case TPTR64:
i = BtoR(~b);
if(i && r->cost > 0) {
r->regno = i;
return RtoB(i);
}
break;
case TFLOAT32:
case TFLOAT64:
case TFLOAT80:
case TFLOAT:
i = BtoF(~b);
if(i && r->cost > 0) {
r->regno = i;
return FtoB(i);
}
break;
}
return 0;
}
void
paint1(Reg *r, int bn)
{
Reg *r1;
Prog *p;
int z;
uint32 bb;
z = bn/32;
bb = 1L<<(bn%32);
if(r->act.b[z] & bb)
return;
for(;;) {
if(!(r->refbehind.b[z] & bb))
break;
r1 = r->p1;
if(r1 == R)
break;
if(!(r1->refahead.b[z] & bb))
break;
if(r1->act.b[z] & bb)
break;
r = r1;
}
if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
change -= CLOAD * r->loop;
if(debug['R'] && debug['v'])
print("%ld%P\tld %Q $%d\n", r->loop,
r->prog, blsh(bn), change);
}
for(;;) {
r->act.b[z] |= bb;
p = r->prog;
if(r->use1.b[z] & bb) {
change += CREF * r->loop;
if(debug['R'] && debug['v'])
print("%ld%P\tu1 %Q $%d\n", r->loop,
p, blsh(bn), change);
}
if((r->use2.b[z]|r->set.b[z]) & bb) {
change += CREF * r->loop;
if(debug['R'] && debug['v'])
print("%ld%P\tu2 %Q $%d\n", r->loop,
p, blsh(bn), change);
}
if(STORE(r) & r->regdiff.b[z] & bb) {
change -= CLOAD * r->loop;
if(debug['R'] && debug['v'])
print("%ld%P\tst %Q $%d\n", r->loop,
p, blsh(bn), change);
}
if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
if(r1->refahead.b[z] & bb)
paint1(r1, bn);
if(!(r->refahead.b[z] & bb))
break;
r1 = r->s2;
if(r1 != R)
if(r1->refbehind.b[z] & bb)
paint1(r1, bn);
r = r->s1;
if(r == R)
break;
if(r->act.b[z] & bb)
break;
if(!(r->refbehind.b[z] & bb))
break;
}
}
uint32
regset(Reg *r, uint32 bb)
{
uint32 b, set;
Adr v;
int c;
set = 0;
v = zprog.from;
while(b = bb & ~(bb-1)) {
v.type = b & 0xFFFF? BtoR(b): BtoF(b);
if(v.type == 0)
fatal("zero v.type for %#lux", b);
c = copyu(r->prog, &v, A);
if(c == 3)
set |= b;
bb &= ~b;
}
return set;
}
uint32
reguse(Reg *r, uint32 bb)
{
uint32 b, set;
Adr v;
int c;
set = 0;
v = zprog.from;
while(b = bb & ~(bb-1)) {
v.type = b & 0xFFFF? BtoR(b): BtoF(b);
c = copyu(r->prog, &v, A);
if(c == 1 || c == 2 || c == 4)
set |= b;
bb &= ~b;
}
return set;
}
uint32
paint2(Reg *r, int bn)
{
Reg *r1;
int z;
uint32 bb, vreg, x;
z = bn/32;
bb = 1L << (bn%32);
vreg = regbits;
if(!(r->act.b[z] & bb))
return vreg;
for(;;) {
if(!(r->refbehind.b[z] & bb))
break;
r1 = r->p1;
if(r1 == R)
break;
if(!(r1->refahead.b[z] & bb))
break;
if(!(r1->act.b[z] & bb))
break;
r = r1;
}
for(;;) {
r->act.b[z] &= ~bb;
vreg |= r->regu;
if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
if(r1->refahead.b[z] & bb)
vreg |= paint2(r1, bn);
if(!(r->refahead.b[z] & bb))
break;
r1 = r->s2;
if(r1 != R)
if(r1->refbehind.b[z] & bb)
vreg |= paint2(r1, bn);
r = r->s1;
if(r == R)
break;
if(!(r->act.b[z] & bb))
break;
if(!(r->refbehind.b[z] & bb))
break;
}
bb = vreg;
for(; r; r=r->s1) {
x = r->regu & ~bb;
if(x) {
vreg |= reguse(r, x);
bb |= regset(r, x);
}
}
return vreg;
}
void
paint3(Reg *r, int bn, int32 rb, int rn)
{
Reg *r1;
Prog *p;
int z;
uint32 bb;
z = bn/32;
bb = 1L << (bn%32);
if(r->act.b[z] & bb)
return;
for(;;) {
if(!(r->refbehind.b[z] & bb))
break;
r1 = r->p1;
if(r1 == R)
break;
if(!(r1->refahead.b[z] & bb))
break;
if(r1->act.b[z] & bb)
break;
r = r1;
}
if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
addmove(r, bn, rn, 0);
for(;;) {
r->act.b[z] |= bb;
p = r->prog;
if(r->use1.b[z] & bb) {
if(debug['R'])
print("%P", p);
addreg(&p->from, rn);
if(debug['R'])
print("\t.c%P\n", p);
}
if((r->use2.b[z]|r->set.b[z]) & bb) {
if(debug['R'])
print("%P", p);
addreg(&p->to, rn);
if(debug['R'])
print("\t.c%P\n", p);
}
if(STORE(r) & r->regdiff.b[z] & bb)
addmove(r, bn, rn, 1);
r->regu |= rb;
if(r->refbehind.b[z] & bb)
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
if(r1->refahead.b[z] & bb)
paint3(r1, bn, rb, rn);
if(!(r->refahead.b[z] & bb))
break;
r1 = r->s2;
if(r1 != R)
if(r1->refbehind.b[z] & bb)
paint3(r1, bn, rb, rn);
r = r->s1;
if(r == R)
break;
if(r->act.b[z] & bb)
break;
if(!(r->refbehind.b[z] & bb))
break;
}
}
void
addreg(Adr *a, int rn)
{
a->sym = 0;
a->offset = 0;
a->type = rn;
}
int32
RtoB(int r)
{
if(r < D_AX || r > D_R15)
return 0;
return 1L << (r-D_AX);
}
int
BtoR(int32 b)
{
b &= 0xffffL;
if(b == 0)
return 0;
return bitno(b) + D_AX;
}
/*
* bit reg
* 16 X5
* 17 X6
* 18 X7
*/
int32
FtoB(int f)
{
if(f < FREGMIN || f > FREGEXT)
return 0;
return 1L << (f - FREGMIN + 16);
}
int
BtoF(int32 b)
{
b &= 0x70000L;
if(b == 0)
return 0;
return bitno(b) - 16 + FREGMIN;
}
static void
dumpit(char *str, Reg *r0)
{
Reg *r, *r1;
int z;
Bits bit;
print("\n%s\n", str);
for(r = r0; r != R; r = r->link) {
print("%ld:%P", r->loop, r->prog);
for(z=0; z<BITS; z++)
bit.b[z] =
r->set.b[z] |
r->use1.b[z] |
r->use2.b[z] |
r->refbehind.b[z] |
r->refahead.b[z] |
r->calbehind.b[z] |
r->calahead.b[z] |
r->regdiff.b[z] |
r->act.b[z] |
0;
if(bany(&bit)) {
print("\t");
if(bany(&r->set))
print(" s:%Q", r->set);
if(bany(&r->use1))
print(" u1:%Q", r->use1);
if(bany(&r->use2))
print(" u2:%Q", r->use2);
if(bany(&r->refbehind))
print(" rb:%Q ", r->refbehind);
if(bany(&r->refahead))
print(" ra:%Q ", r->refahead);
if(bany(&r->calbehind))
print("cb:%Q ", r->calbehind);
if(bany(&r->calahead))
print(" ca:%Q ", r->calahead);
if(bany(&r->regdiff))
print(" d:%Q ", r->regdiff);
if(bany(&r->act))
print(" a:%Q ", r->act);
}
print("\n");
r1 = r->p2;
if(r1 != R) {
print(" pred:");
for(; r1 != R; r1 = r1->p2link)
print(" %.4lud", r1->prog->loc);
print("\n");
}
// r1 = r->s1;
// if(r1 != R) {
// print(" succ:");
// for(; r1 != R; r1 = r1->s1)
// print(" %.4lud", r1->prog->loc);
// print("\n");
// }
}
}
static Sym* symlist[10];
static int
noreturn(Prog *p)
{
Sym *s;
int i;
if(symlist[0] == S) {
symlist[0] = pkglookup("throwindex", "sys");
}
s = p->to.sym;
if(s == S)
return 0;
for(i=0; symlist[i]!=S; i++)
if(s == symlist[i])
return 1;
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment