Commit 1ac2cfc7 authored by Kai Backman's avatar Kai Backman

grab bag of changes aimed at getting stack splitting to work:

- morestack support for 5l and arm runtime
- argsize support in 5c, 5l, ar and nm. assembly code from 5a
  will break in interesting ways unless NOSPLIT is specified
- explicit cond execution constants
- fix 5l output to use %d instead of %ld so that negative
  values show.
- added a lot of code to arm/asm.s. runtime entry code almost
  working currently aborts at gogo not implemented

R=rsc
APPROVED=rsc
DELTA=305  (125 added, 29 deleted, 151 changed)
OCL=30246
CL=30347
parent d45442ed
......@@ -61,6 +61,7 @@ typedef struct Rgn Rgn;
struct Adr
{
int32 offset;
int32 offset2;
double dval;
char sval[NSNAME];
Ieee ieee;
......@@ -140,7 +141,7 @@ struct Reg
int32 regu;
int32 loop; /* could be shorter */
Reg* log5;
int32 active;
......@@ -362,10 +363,10 @@ int32 FtoB(int);
int BtoR(int32);
int BtoF(int32);
void predicate(void);
int isbranch(Prog *);
int predicable(Prog *p);
int modifiescpsr(Prog *p);
void predicate(void);
int isbranch(Prog *);
int predicable(Prog *p);
int modifiescpsr(Prog *p);
#pragma varargck type "A" int
#pragma varargck type "B" Bits
......
......@@ -28,6 +28,7 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#define EXTERN
#include "gc.h"
......@@ -71,9 +72,9 @@ Bconv(Fmt *fp)
}
char *extra [] = {
".EQ", ".NE", ".CS", ".CC",
".MI", ".PL", ".VS", ".VC",
".HI", ".LS", ".GE", ".LT",
".EQ", ".NE", ".CS", ".CC",
".MI", ".PL", ".VS", ".VC",
".HI", ".LS", ".GE", ".LT",
".GT", ".LE", "", ".NV",
};
......@@ -86,7 +87,7 @@ Pconv(Fmt *fp)
p = va_arg(fp->args, Prog*);
a = p->as;
s = p->scond;
s = p->scond;
strcpy(sc, extra[s & C_SCOND]);
if(s & C_SBIT)
strcat(sc, ".S");
......@@ -162,6 +163,10 @@ Dconv(Fmt *fp)
sprint(str, "$%N", a);
break;
case D_CONST2:
sprint(str, "$%d-%d", a->offset, a->offset2);
break;
case D_SHIFT:
v = a->offset;
op = "<<>>->@>" + (((v>>5) & 3) << 1);
......@@ -224,6 +229,7 @@ Rconv(Fmt *fp)
sprint(str, "GOK-reglist");
switch(a->type) {
case D_CONST:
case D_CONST2:
if(a->reg != NREG)
break;
if(a->sym != S)
......
......@@ -30,6 +30,31 @@
#include "gc.h"
int32
argsize(void)
{
Type *t;
int32 s;
//print("t=%T\n", thisfn);
s = 0;
for(t=thisfn->down; t!=T; t=t->down) {
switch(t->etype) {
case TVOID:
break;
case TDOT:
s += 64;
break;
default:
s = align(s, t, Aarg1);
s = align(s, t, Aarg2);
break;
}
//print(" %d %T\n", s, t);
}
return (s+7) & ~7;
}
void
codgen(Node *n, Node *nn)
{
......@@ -53,6 +78,9 @@ codgen(Node *n, Node *nn)
}
nearln = nn->lineno;
gpseudo(ATEXT, n1->sym, nodconst(stkoff));
p->to.type = D_CONST2;
p->to.offset2 = argsize();
sp = p;
/*
......@@ -313,7 +341,7 @@ loop:
patch(spc, pc);
gen(l->right->right); /* inc */
patch(sp, pc);
patch(sp, pc);
if(l->left != Z) { /* test */
bcomplex(l->left, Z);
patch(p, breakpc);
......
......@@ -338,7 +338,7 @@ loop:
if(vs < 0) {
gopcode(OAS, &nod1, Z, &nod1);
gopcode(OSUB, &nod1, nodconst(0), nn);
} else
} else
gopcode(OAS, &nod1, Z, nn);
regfree(&nod1);
return 1;
......@@ -649,6 +649,13 @@ zaddr(char *bp, Adr *a, int s)
case D_PSR:
break;
case D_CONST2:
l = a->offset2;
bp[0] = l;
bp[1] = l>>8;
bp[2] = l>>16;
bp[3] = l>>24;
bp += 4; // fall through
case D_OREG:
case D_CONST:
case D_BRANCH:
......
......@@ -84,10 +84,10 @@ enum as
AB,
ABL,
/*
* Do not reorder or fragment the conditional branch
* opcodes, or the predication code will break
*/
/*
* Do not reorder or fragment the conditional branch
* opcodes, or the predication code will break
*/
ABEQ,
ABNE,
ABCS,
......@@ -186,6 +186,22 @@ enum as
#define C_FBIT (1<<7) /* psr flags-only */
#define C_UBIT (1<<7) /* up bit */
#define C_SCOND_EQ 0
#define C_SCOND_NE 1
#define C_SCOND_HS 2
#define C_SCOND_LO 3
#define C_SCOND_MI 4
#define C_SCOND_PL 5
#define C_SCOND_VS 6
#define C_SCOND_VC 7
#define C_SCOND_HI 8
#define C_SCOND_LS 9
#define C_SCOND_GE 10
#define C_SCOND_LT 11
#define C_SCOND_GT 12
#define C_SCOND_LE 13
#define C_SCOND_N 15
/* type/name */
#define D_GOK 0
#define D_NONE 1
......@@ -209,6 +225,7 @@ enum as
#define D_ADDR (D_NONE+22)
#define D_SBIG (D_NONE+23)
#define D_CONST2 (D_NONE+24)
/* name */
#define D_EXTERN (D_NONE+3)
......
......@@ -72,6 +72,7 @@ struct Adr
uchar index; // not used on arm, required by ld/go.c
char reg;
char name;
int32 offset2; // argsize
char class;
};
......
......@@ -61,7 +61,7 @@ Pconv(Fmt *fp)
switch(a) {
default:
s = str;
s += sprint(s, "(%ld)", p->line);
s += sprint(s, "(%d)", p->line);
if(p->reg == NREG)
sprint(s, " %A%C %D,%D",
a, p->scond, &p->from, &p->to);
......@@ -76,23 +76,23 @@ Pconv(Fmt *fp)
case ASWPW:
case ASWPBU:
sprint(str, "(%ld) %A%C R%d,%D,%D",
sprint(str, "(%d) %A%C R%d,%D,%D",
p->line, a, p->scond, p->reg, &p->from, &p->to);
break;
case ADATA:
case AINIT:
case ADYNT:
sprint(str, "(%ld) %A%C %D/%d,%D",
sprint(str, "(%d) %A%C %D/%d,%D",
p->line, a, p->scond, &p->from, p->reg, &p->to);
break;
case AWORD:
sprint(str, "WORD %ld", p->to.offset);
sprint(str, "WORD %x", p->to.offset);
break;
case ADWORD:
sprint(str, "DWORD %ld %ld", p->from.offset, p->to.offset);
sprint(str, "DWORD %x %x", p->from.offset, p->to.offset);
break;
}
return fmtstrcpy(fp, str);
......@@ -178,13 +178,17 @@ Dconv(Fmt *fp)
sprint(str, "$%N(R%d)", a, a->reg);
break;
case D_CONST2:
sprint(str, "$%d-%d", a->offset, a->offset2);
break;
case D_SHIFT:
v = a->offset;
op = "<<>>->@>" + (((v>>5) & 3) << 1);
if(v & (1<<4))
sprint(str, "R%ld%c%cR%ld", v&15, op[0], op[1], (v>>8)&15);
sprint(str, "R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15);
else
sprint(str, "R%ld%c%c%ld", v&15, op[0], op[1], (v>>7)&31);
sprint(str, "R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31);
if(a->reg != NREG)
sprint(str+strlen(str), "(R%d)", a->reg);
break;
......@@ -262,9 +266,9 @@ Dconv(Fmt *fp)
sprint(str, "%.5lux(BRANCH)", v);
} else
if(a->sym != S)
sprint(str, "%s+%ld(APC)", a->sym->name, a->offset);
sprint(str, "%s+%d(APC)", a->sym->name, a->offset);
else
sprint(str, "%ld(APC)", a->offset);
sprint(str, "%d(APC)", a->offset);
break;
case D_FCONST:
......@@ -293,35 +297,35 @@ Nconv(Fmt *fp)
break;
case D_NONE:
sprint(str, "%ld", a->offset);
sprint(str, "%d", a->offset);
break;
case D_EXTERN:
if(s == S)
sprint(str, "%ld(SB)", a->offset);
sprint(str, "%d(SB)", a->offset);
else
sprint(str, "%s+%ld(SB)", s->name, a->offset);
sprint(str, "%s+%d(SB)", s->name, a->offset);
break;
case D_STATIC:
if(s == S)
sprint(str, "<>+%ld(SB)", a->offset);
sprint(str, "<>+%d(SB)", a->offset);
else
sprint(str, "%s<>+%ld(SB)", s->name, a->offset);
sprint(str, "%s<>+%d(SB)", s->name, a->offset);
break;
case D_AUTO:
if(s == S)
sprint(str, "%ld(SP)", a->offset);
sprint(str, "%d(SP)", a->offset);
else
sprint(str, "%s-%ld(SP)", s->name, -a->offset);
sprint(str, "%s-%d(SP)", s->name, -a->offset);
break;
case D_PARAM:
if(s == S)
sprint(str, "%ld(FP)", a->offset);
sprint(str, "%d(FP)", a->offset);
else
sprint(str, "%s+%ld(FP)", s->name, a->offset);
sprint(str, "%s+%d(FP)", s->name, a->offset);
break;
}
return fmtstrcpy(fp, str);
......
......@@ -128,7 +128,7 @@ noops(void)
Bflush(&bso);
pmorestack = P;
symmorestack = lookup("sys·morestack", 0);
symmorestack = lookup("sys·morestackx", 0);
if(symmorestack->type == STEXT)
for(p = firstp; p != P; p = p->link) {
......@@ -342,8 +342,7 @@ noops(void)
break;
}
// if(p->reg & NOSPLIT) {
if(1) {
if(p->reg & NOSPLIT) {
q1 = prg();
q1->as = AMOVW;
q1->scond |= C_WBIT;
......@@ -355,61 +354,78 @@ noops(void)
q1->to.reg = REGSP;
q1->link = p->link;
p->link = q1;
} else { // !NOSPLIT
// split stack check
if(autosize < StackBig) {
p = appendp(p); // load G.stackguard into R1
p->as = AMOVW;
p->from.type = D_OREG;
p->from.reg = REGG;
p->to.type = D_REG;
p->to.reg = 1;
p = appendp(p);
p->as = ACMP;
p->from.type = D_REG;
p->from.reg = 1;
p->from.offset = -autosize;
p->reg = REGSP;
}
// TODO(kaib): Optimize the heck out of this
p = appendp(p); // store autosize in M.morearg
} else if (autosize < StackBig) {
// split stack check for small functions
// MOVW (REGG), R1
// CMP R1, $-autosize(SP)
// MOVW.W.LT R14,$-autosize(SP)
// MOVW.W.GE R14,$-4(SP)
// MOVW.GE $(args << 24 | autosize), R1
// BL.GE callmorestack(SB)
// TODO(kaib): double check we allocate autosize after
// stack has been split
// TODO(kaib): add error in case autosize doesn't pack
// TODO(kaib): add more trampolines
// TODO(kaib): put stackguard in register
// TODO(kaib): add support for -K and underflow detection
p = appendp(p); // load G.stackguard into R1
p->as = AMOVW;
p->from.type = D_CONST;
if(autosize+160 > 4096)
p->from.offset = (autosize+160) & ~7LL;
p->from.type = D_OREG;
p->from.reg = REGG;
p->to.type = D_REG;
p->to.reg = REGTMP;
p->to.reg = 1;
p = appendp(p);
p->as = ACMP;
p->from.type = D_REG;
p->from.reg = 1;
p->from.offset = -autosize;
p->reg = REGSP;
p = appendp(p);
p->as = AMOVW;
p->scond = C_SCOND_GE | C_WBIT;
p->from.type = D_REG;
p->from.reg = REGTMP;
p->from.reg = REGLINK;
p->to.type = D_OREG;
p->to.reg = REGM;
p->to.offset = 4;
p->to.offset = -autosize;
p->to.reg = REGSP;
p = appendp(p);
p->as = AMOVW;
p->scond = C_SCOND_LT | C_WBIT;
p->from.type = D_REG;
p->from.reg = REGLINK;
p->to.type = D_OREG;
p->to.offset = -4;
p->to.reg = REGSP;
p = appendp(p); // packs args and autosize
p->as = AMOVW;
p->scond = C_SCOND_LT;
p->from.type = D_CONST;
// p->from.offset = curtext->to.offset2;
// top 8 bits are arg count, lower 24 bits number of 4 byte
// words
p->from.offset =
(curtext->to.offset2 & ~7) << 21 |
(autosize & ~7) >> 3;
p->to.type = D_REG;
p->to.reg = REGTMP;
p->to.reg = 1;
p = appendp(p);
p->as = AMOVW;
p->from.type = D_REG;
p->from.reg = REGTMP;
p->to.type = D_OREG;
p->to.reg = REGM;
p->to.offset = 8;
// p = appendp(p);
// p->as = ABL;
// p->to.type = D_BRANCH;
// p->to.sym = symmorestack;
// p->cond = pmorestack;
p->as = ABL;
p->scond = C_SCOND_LT;
p->to.type = D_BRANCH;
p->to.sym = symmorestack;
p->cond = pmorestack;
} else { // > StackBig
// MOVW.W R14,$-4(SP)
// MOVW $(args << 24 | autosize), R1
// BL callmorestack(SB)
// TODO(kaib): Fix large stacks, don't use packing
diag("StackBig broken");
}
break;
......@@ -803,7 +819,7 @@ noops(void)
p->link = q;
}
}
if(seenthumb && !thumb && p->to.type == D_OREG && p->to.reg == REGLINK){
if(seenthumb && !thumb && p->to.type == D_OREG && p->to.reg == REGLINK){
// print("warn %s: b (R%d) assuming a return\n", curtext->from.sym->name, p->to.reg);
p->as = ABXRET;
}
......
......@@ -546,6 +546,8 @@ zaddr(Biobuf *f, Adr *a, Sym *h[])
c++;
break;
case D_CONST2:
a->offset2 = Bget4(f); // fall through
case D_BRANCH:
case D_OREG:
case D_CONST:
......
......@@ -87,12 +87,12 @@ fninc(Sym *s)
return 4;
else
return 0;
}
}
}
return 0;
}
int
int
fnpinc(Sym *s)
{
if(!s->fnptr){ // a simplified case BX O(R) -> BL O(R)
......@@ -318,7 +318,7 @@ span(void)
else
m = 2;
p->align = 0;
}
}
if(p->align){
if((p->align == 4 && (c&3)) || (p->align == 2 && !(c&3))){
if(ispad(op)){
......@@ -374,9 +374,9 @@ span(void)
// print("%d bytes removed (padding)\n", d);
c -= d;
}
if(debug['t']) {
/*
/*
* add strings to text segment
*/
c = rnd(c, 8);
......@@ -391,7 +391,7 @@ span(void)
c += v;
}
}
c = rnd(c, 8);
setext = lookup("etext", 0);
......@@ -726,6 +726,7 @@ aclass(Adr *a)
return C_FCON;
case D_CONST:
case D_CONST2:
switch(a->name) {
case D_NONE:
......@@ -1072,7 +1073,7 @@ buildop(void)
oprange[AMOVFD] = oprange[r];
oprange[AMOVDF] = oprange[r];
break;
case ACMPF:
oprange[ACMPD] = oprange[r];
break;
......@@ -1246,7 +1247,7 @@ asmdyn()
t += 4;
t += sput(s->name);
}
la = 0;
r = &rels;
n = r->n;
......
......@@ -123,6 +123,11 @@ addr(Biobuf *bp)
case D_PSR:
case D_FPCR:
break;
case D_CONST2:
Bgetc(bp);
Bgetc(bp);
Bgetc(bp);
Bgetc(bp); // fall through
case D_OREG:
case D_CONST:
case D_BRANCH:
......
......@@ -3,88 +3,80 @@
// license that can be found in the LICENSE file.
TEXT _rt0_arm(SB),7,$0
// copy arguments forward on an even stack
// MOVW $0(SP), R0
// MOVL 0(SP), R1 // argc
// LEAL 4(SP), R1 // argv
// SUBL $128, SP // plenty of scratch
// ANDL $~7, SP
// MOVL AX, 120(SP) // save argc, argv away
// MOVL BX, 124(SP)
// // write "go386\n"
// PUSHL $6
// PUSHL $hello(SB)
// PUSHL $1
// CALL sys·write(SB)
// POPL AX
// POPL AX
// POPL AX
// CALL ldt0setup(SB)
// set up %fs to refer to that ldt entry
// MOVL $(7*8+7), AX
// MOVW AX, FS
// // store through it, to make sure it works
// MOVL $0x123, 0(FS)
// MOVL tls0(SB), AX
// CMPL AX, $0x123
// JEQ ok
// MOVL AX, 0
// ok:
// // set up m and g "registers"
// // g is 0(FS), m is 4(FS)
// LEAL g0(SB), CX
// MOVL CX, 0(FS)
// LEAL m0(SB), AX
// MOVL AX, 4(FS)
// // save m->g0 = g0
// MOVL CX, 0(AX)
// // create istack out of the OS stack
// LEAL (-8192+104)(SP), AX // TODO: 104?
// MOVL AX, 0(CX) // 8(g) is stack limit (w 104b guard)
// MOVL SP, 4(CX) // 12(g) is base
// CALL emptyfunc(SB) // fault if stack check is wrong
// // convention is D is always cleared
// CLD
// CALL check(SB)
// // saved argc, argv
// MOVL 120(SP), AX
// MOVL AX, 0(SP)
// MOVL 124(SP), AX
// MOVL AX, 4(SP)
// CALL args(SB)
// CALL osinit(SB)
// CALL schedinit(SB)
// // create a new goroutine to start program
// PUSHL $mainstart(SB) // entry
// PUSHL $8 // arg size
// CALL sys·newproc(SB)
// POPL AX
// POPL AX
MOVW $setR12(SB), R12
// copy arguments forward on an even stack
MOVW 0(SP), R0 // argc
MOVW 4(SP), R1 // argv
SUB $128, SP // plenty of scratch
AND $~7, SP
MOVW R0, 120(SP) // save argc, argv away
MOVW R1, 124(SP)
// set up m and g registers
// g is R10, m is R9
MOVW $g0(SB), R10
MOVW $m0(SB), R9
// save m->g0 = g0
MOVW R10, 0(R9)
// create istack out of the OS stack
MOVW $(-8192+104)(SP), R0
MOVW R0, 0(R10) // 0(g) is stack limit (w 104b guard)
MOVW SP, 4(R10) // 4(g) is base
BL emptyfunc(SB) // fault if stack check is wrong
BL check(SB)
// saved argc, argv
MOVW 120(SP), R0
MOVW R0, 0(SP)
MOVW 124(SP), R0
MOVW R0, 4(SP)
BL args(SB)
BL osinit(SB)
BL schedinit(SB)
// create a new goroutine to start program
MOVW $mainstart(SB), R0
MOVW.W R0, -4(SP)
MOVW $8, R0
MOVW.W R0, -4(SP)
MOVW $0, R0
MOVW.W R0, -4(SP) // push $0 as guard
BL sys·newproc(SB)
MOVW $12(SP), SP // pop args and LR
// start this M
BL mstart(SB)
MOVW $0, R0
SWI $0x00900001
B _dep_dummy(SB) // Never reached
// // start this M
// CALL mstart(SB)
TEXT mainstart(SB),7,$0
BL main·init(SB)
BL initdone(SB)
BL main·main(SB)
MOVW $99, R0
SWI $0x00900001
MOVW $0, R0
MOVW.W R0, -4(SP)
MOVW.W R14, -4(SP) // Push link as well
BL exit(SB)
MOVW $8(SP), SP // pop args and LR
RET
// TODO(kaib): remove these once linker works properly
// pull in dummy dependencies
// TEXT _dep_dummy(SB),7,$0
// BL sys·morestack(SB)
TEXT _dep_dummy(SB),7,$0
BL sys·morestack(SB)
BL sys·morestackx(SB)
BL _div(SB)
BL _divu(SB)
BL _mod(SB)
BL _modu(SB)
BL _modu(SB)
TEXT breakpoint(SB),7,$0
......@@ -114,33 +106,38 @@ TEXT gosave(SB), 7, $0
// support for morestack
// return point when leaving new stack.
// save AX, jmp to lesstack to switch back
// save R0, jmp to lesstack to switch back
TEXT retfromnewstack(SB),7,$0
BL abort(SB)
// MOVL 4(FS), BX // m
// MOVL AX, 12(BX) // save AX in m->cret
// JMP lessstack(SB)
MOVW R0,12(R9) // m->cret
B lessstack(SB)
// gogo, returning 2nd arg instead of 1
TEXT gogoret(SB), 7, $0
BL abort(SB)
// MOVL 8(SP), AX // return 2nd arg
// MOVL 4(SP), BX // gobuf
// MOVL 0(BX), SP // restore SP
// MOVL 4(BX), BX
// MOVL BX, 0(SP) // put PC on the stack
// RET
MOVW 8(SP), R0 // return 2nd arg
MOVW 4(SP), R1 // gobuf
MOVW 0(R1), SP // restore SP
MOVW 4(R1), PC // restore PC
TEXT setspgoto(SB), 7, $0
BL abort(SB)
// MOVL 4(SP), AX // SP
// MOVL 8(SP), BX // fn to call
// MOVL 12(SP), CX // fn to return
// MOVL AX, SP
// PUSHL CX
// JMP BX
// POPL AX // not reached
// RET
MOVW 4(SP), R0 // SP
MOVW 8(SP), R1 // fn to call
MOVW 12(SP), R2 // fn to return into
MOVW R2, R14 // restore LR
MOVW R0, SP
MOVW R1, PC // goto
// Optimization to make inline stack splitting code smaller
// R0 is original first argument
// R1 is arg_num << 24 | autosize >> 3
TEXT sys·morestackx(SB), 7, $0
MOVW R0, 4(SP) // Save arg0
MOVW R1<<8, R2
MOVW R2>>5, R2
MOVW R2, 4(R10) // autooffset into g
MOVW R1>>24, R2
MOVW R2<<3, R2
MOVW R2, 8(R10) // argsize into g
B sys·morestack(SB)
// bool cas(int32 *val, int32 old, int32 new)
// Atomically:
......@@ -149,18 +146,26 @@ TEXT setspgoto(SB), 7, $0
// return 1;
// }else
// return 0;
TEXT cas(SB), 7, $0
BL abort(SB)
// MOVL 4(SP), BX
// MOVL 8(SP), AX
// MOVL 12(SP), CX
// LOCK
// CMPXCHGL CX, 0(BX)
// JZ 3(PC)
// MOVL $0, AX
// RET
// MOVL $1, AX
// RET
#define LDREX(a,r) WORD $(0xe<<28|0x01900f9f | (a)<<16 | (r)<<12)
#define STREX(a,v,r) WORD $(0xe<<28|0x01800f90 | (a)<<16 | (r)<<12 | (v)<<0)
TEXT cas+0(SB),0,$12 /* r0 holds p */
MOVW ov+4(FP), R1
MOVW nv+8(FP), R2
spin:
/* LDREX 0(R0),R3 */
LDREX(0,3)
CMP.S R3, R1
BNE fail
/* STREX 0(R0),R2,R4 */
STREX(0,2,4)
CMP.S $0, R4
BNE spin
MOVW $1, R0
RET
fail:
MOVW $0, R0
RET
// void jmpdefer(fn, sp);
// called from deferreturn.
......@@ -200,6 +205,10 @@ TEXT sys·setcallerpc+0(SB),7,$0
// MOVL BX, -4(AX) // set calling pc
// RET
TEXT emptyfunc(SB),0,$0
RET
TEXT abort(SB),7,$0
WORD $0
MOVW $0, R0
MOVW (R0), R1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment