Commit 383963b5 authored by Keith Randall's avatar Keith Randall

runtime: zero at start of frame more efficiently.

Use Duff's device for zeroing.  Combine adjacent regions.

Update #7680
Update #7624

LGTM=rsc
R=rsc
CC=golang-codereviews
https://golang.org/cl/83200045
parent 9c8f11ff
...@@ -10,13 +10,14 @@ ...@@ -10,13 +10,14 @@
#include "opt.h" #include "opt.h"
static Prog *appendpp(Prog*, int, int, vlong, int, vlong); static Prog *appendpp(Prog*, int, int, vlong, int, vlong);
static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
void void
defframe(Prog *ptxt) defframe(Prog *ptxt)
{ {
uint32 frame; uint32 frame, ax;
Prog *p; Prog *p;
vlong i; vlong hi, lo;
NodeList *l; NodeList *l;
Node *n; Node *n;
...@@ -28,14 +29,13 @@ defframe(Prog *ptxt) ...@@ -28,14 +29,13 @@ defframe(Prog *ptxt)
frame = rnd(stksize+maxarg, widthreg); frame = rnd(stksize+maxarg, widthreg);
ptxt->to.offset |= frame; ptxt->to.offset |= frame;
// insert code to contain ambiguously live variables // insert code to zero ambiguously live variables
// so that garbage collector only sees initialized values // so that the garbage collector only sees initialized values
// when it looks for pointers. // when it looks for pointers.
//
// TODO: determine best way to zero the given values.
// among other problems, AX is initialized to 0 multiple times,
// but that's really the tip of the iceberg.
p = ptxt; p = ptxt;
lo = hi = 0;
ax = 0;
// iterate through declarations - they are sorted in decreasing xoffset order.
for(l=curfn->dcl; l != nil; l = l->next) { for(l=curfn->dcl; l != nil; l = l->next) {
n = l->n; n = l->n;
if(!n->needzero) if(!n->needzero)
...@@ -44,21 +44,50 @@ defframe(Prog *ptxt) ...@@ -44,21 +44,50 @@ defframe(Prog *ptxt)
fatal("needzero class %d", n->class); fatal("needzero class %d", n->class);
if(n->type->width % widthreg != 0 || n->xoffset % widthreg != 0 || n->type->width == 0) if(n->type->width % widthreg != 0 || n->xoffset % widthreg != 0 || n->type->width == 0)
fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
if(n->type->width <= 2*widthreg) {
for(i = 0; i < n->type->width; i += widthreg) if(n->xoffset + n->type->width >= lo - 2*widthptr) {
p = appendpp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame+n->xoffset+i); // merge with range we already have
} else if(n->type->width <= 16*widthreg) { lo = n->xoffset;
continue;
}
// zero old range
p = zerorange(p, frame, lo, hi, &ax);
// set new range
hi = n->xoffset + n->type->width;
lo = n->xoffset;
}
// zero final range
zerorange(p, frame, lo, hi, &ax);
}
static Prog*
zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax)
{
vlong cnt, i;
cnt = hi - lo;
if(cnt == 0)
return p;
if(*ax == 0) {
p = appendpp(p, AMOVQ, D_CONST, 0, D_AX, 0); p = appendpp(p, AMOVQ, D_CONST, 0, D_AX, 0);
for(i = 0; i < n->type->width; i += widthreg) *ax = 1;
p = appendpp(p, AMOVQ, D_AX, 0, D_SP+D_INDIR, frame+n->xoffset+i); }
if(cnt <= 4*widthreg) {
for(i = 0; i < cnt; i += widthreg) {
p = appendpp(p, AMOVQ, D_AX, 0, D_SP+D_INDIR, frame+lo+i);
}
} else if(cnt <= 128*widthreg) {
p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0);
p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 2*(128-cnt/widthreg));
p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
} else { } else {
p = appendpp(p, AMOVQ, D_CONST, 0, D_AX, 0); p = appendpp(p, AMOVQ, D_CONST, cnt/widthreg, D_CX, 0);
p = appendpp(p, AMOVQ, D_CONST, n->type->width/widthreg, D_CX, 0); p = appendpp(p, leaptr, D_SP+D_INDIR, frame+lo, D_DI, 0);
p = appendpp(p, leaptr, D_SP+D_INDIR, frame+n->xoffset, D_DI, 0);
p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0); p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
p = appendpp(p, ASTOSQ, D_NONE, 0, D_NONE, 0); p = appendpp(p, ASTOSQ, D_NONE, 0, D_NONE, 0);
} }
} return p;
} }
static Prog* static Prog*
......
...@@ -10,13 +10,14 @@ ...@@ -10,13 +10,14 @@
#include "opt.h" #include "opt.h"
static Prog *appendpp(Prog*, int, int, vlong, int, vlong); static Prog *appendpp(Prog*, int, int, vlong, int, vlong);
static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax);
void void
defframe(Prog *ptxt) defframe(Prog *ptxt)
{ {
uint32 frame; uint32 frame, ax;
Prog *p; Prog *p;
vlong i; vlong lo, hi;
NodeList *l; NodeList *l;
Node *n; Node *n;
...@@ -27,14 +28,12 @@ defframe(Prog *ptxt) ...@@ -27,14 +28,12 @@ defframe(Prog *ptxt)
frame = rnd(stksize+maxarg, widthptr); frame = rnd(stksize+maxarg, widthptr);
ptxt->to.offset = frame; ptxt->to.offset = frame;
// insert code to contain ambiguously live variables // insert code to zero ambiguously live variables
// so that garbage collector only sees initialized values // so that the garbage collector only sees initialized values
// when it looks for pointers. // when it looks for pointers.
//
// TODO: determine best way to zero the given values.
// among other problems, AX is initialized to 0 multiple times,
// but that's really the tip of the iceberg.
p = ptxt; p = ptxt;
lo = hi = 0;
ax = 0;
for(l=curfn->dcl; l != nil; l = l->next) { for(l=curfn->dcl; l != nil; l = l->next) {
n = l->n; n = l->n;
if(!n->needzero) if(!n->needzero)
...@@ -43,21 +42,49 @@ defframe(Prog *ptxt) ...@@ -43,21 +42,49 @@ defframe(Prog *ptxt)
fatal("needzero class %d", n->class); fatal("needzero class %d", n->class);
if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0) if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset); fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
if(n->type->width <= 2*widthptr) { if(n->xoffset + n->type->width == lo - 2*widthptr) {
for(i = 0; i < n->type->width; i += widthptr) // merge with range we already have
p = appendpp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame+n->xoffset+i); lo = n->xoffset;
} else if(n->type->width <= 16*widthptr) { continue;
}
// zero old range
p = zerorange(p, frame, lo, hi, &ax);
// set new range
hi = n->xoffset + n->type->width;
lo = n->xoffset;
}
// zero final range
zerorange(p, frame, lo, hi, &ax);
}
static Prog*
zerorange(Prog *p, vlong frame, vlong lo, vlong hi, uint32 *ax)
{
vlong cnt, i;
cnt = hi - lo;
if(cnt == 0)
return p;
if(*ax == 0) {
p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0); p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0);
for(i = 0; i < n->type->width; i += widthptr) *ax = 1;
p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+n->xoffset+i); }
if(cnt <= 4*widthreg) {
for(i = 0; i < cnt; i += widthreg) {
p = appendpp(p, AMOVL, D_AX, 0, D_SP+D_INDIR, frame+lo+i);
}
} else if(cnt <= 128*widthreg) {
p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0);
p = appendpp(p, ADUFFZERO, D_NONE, 0, D_ADDR, 1*(128-cnt/widthreg));
p->to.sym = linksym(pkglookup("duffzero", runtimepkg));
} else { } else {
p = appendpp(p, AMOVL, D_CONST, 0, D_AX, 0); p = appendpp(p, AMOVL, D_CONST, cnt/widthreg, D_CX, 0);
p = appendpp(p, AMOVL, D_CONST, n->type->width/widthptr, D_CX, 0); p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+lo, D_DI, 0);
p = appendpp(p, ALEAL, D_SP+D_INDIR, frame+n->xoffset, D_DI, 0);
p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0); p = appendpp(p, AREP, D_NONE, 0, D_NONE, 0);
p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0); p = appendpp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
} }
} return p;
} }
static Prog* static Prog*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment