Commit 7910cd68 authored by Russ Cox's avatar Russ Cox

cmd/gc: zero pointers on entry to function

On entry to a function, zero the results and zero the pointer
section of the local variables.

This is an intermediate step on the way to precise collection
of Go frames.

This can incur a significant (up to 30%) slowdown, but it also ensures
that the garbage collector never looks at a word in a Go frame
and sees a stale pointer value that could cause a space leak.
(C frames and assembly frames are still possibly problematic.)

This CL is required to start making collection of interface values
as precise as collection of pointer values are today.
Since we have to dereference the interface type to understand
whether the value is a pointer, it is critical that the type field be
initialized.

A future CL by Carl will make the garbage collection pointer
bitmaps context-sensitive. At that point it will be possible to
remove most of the zeroing. The only values that will still need
zeroing are values whose addresses escape the block scoping
of the function but do not escape to the heap.

benchmark                         old ns/op    new ns/op    delta
BenchmarkBinaryTree17            4420289180   4331060459   -2.02%
BenchmarkFannkuch11              3442469663   3277706251   -4.79%
BenchmarkFmtFprintfEmpty                100          142  +42.00%
BenchmarkFmtFprintfString               262          310  +18.32%
BenchmarkFmtFprintfInt                  213          281  +31.92%
BenchmarkFmtFprintfIntInt               355          431  +21.41%
BenchmarkFmtFprintfPrefixedInt          321          383  +19.31%
BenchmarkFmtFprintfFloat                444          533  +20.05%
BenchmarkFmtManyArgs                   1380         1559  +12.97%
BenchmarkGobDecode                 10240054     11794915  +15.18%
BenchmarkGobEncode                 17350274     19970478  +15.10%
BenchmarkGzip                     455179460    460699139   +1.21%
BenchmarkGunzip                   114271814    119291574   +4.39%
BenchmarkHTTPClientServer             89051        89894   +0.95%
BenchmarkJSONEncode                40486799     52691558  +30.15%
BenchmarkJSONDecode                94193361    112428781  +19.36%
BenchmarkMandelbrot200              4747060      4748043   +0.02%
BenchmarkGoParse                    6363798      6675098   +4.89%
BenchmarkRegexpMatchEasy0_32            129          171  +32.56%
BenchmarkRegexpMatchEasy0_1K            365          395   +8.22%
BenchmarkRegexpMatchEasy1_32            106          152  +43.40%
BenchmarkRegexpMatchEasy1_1K            952         1245  +30.78%
BenchmarkRegexpMatchMedium_32           198          283  +42.93%
BenchmarkRegexpMatchMedium_1K         79006       101097  +27.96%
BenchmarkRegexpMatchHard_32            3478         5115  +47.07%
BenchmarkRegexpMatchHard_1K          110245       163582  +48.38%
BenchmarkRevcomp                  777384355    793270857   +2.04%
BenchmarkTemplate                 136713089    157093609  +14.91%
BenchmarkTimeParse                     1511         1761  +16.55%
BenchmarkTimeFormat                     535          850  +58.88%

benchmark                          old MB/s     new MB/s  speedup
BenchmarkGobDecode                    74.95        65.07    0.87x
BenchmarkGobEncode                    44.24        38.43    0.87x
BenchmarkGzip                         42.63        42.12    0.99x
BenchmarkGunzip                      169.81       162.67    0.96x
BenchmarkJSONEncode                   47.93        36.83    0.77x
BenchmarkJSONDecode                   20.60        17.26    0.84x
BenchmarkGoParse                       9.10         8.68    0.95x
BenchmarkRegexpMatchEasy0_32         247.24       186.31    0.75x
BenchmarkRegexpMatchEasy0_1K        2799.20      2591.93    0.93x
BenchmarkRegexpMatchEasy1_32         299.31       210.44    0.70x
BenchmarkRegexpMatchEasy1_1K        1074.71       822.45    0.77x
BenchmarkRegexpMatchMedium_32          5.04         3.53    0.70x
BenchmarkRegexpMatchMedium_1K         12.96        10.13    0.78x
BenchmarkRegexpMatchHard_32            9.20         6.26    0.68x
BenchmarkRegexpMatchHard_1K            9.29         6.26    0.67x
BenchmarkRevcomp                     326.95       320.40    0.98x
BenchmarkTemplate                     14.19        12.35    0.87x

R=cshapiro
CC=golang-dev
https://golang.org/cl/12616045
parent 20728966
......@@ -9,9 +9,15 @@
#include "gg.h"
#include "opt.h"
static Prog* appendp(Prog*, int, int, int, int32, int, int, int32);
void
defframe(Prog *ptxt)
defframe(Prog *ptxt, Bvec *bv)
{
int i, first;
uint32 frame;
Prog *p, *p1;
// fill in argument size
ptxt->to.type = D_CONST2;
ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
......@@ -19,8 +25,60 @@ defframe(Prog *ptxt)
// fill in final stack size
if(stksize > maxstksize)
maxstksize = stksize;
ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
frame = rnd(maxstksize+maxarg, widthptr);
ptxt->to.offset = frame;
maxstksize = 0;
// insert code to clear pointered part of the frame,
// so that garbage collector only sees initialized values
// when it looks for pointers.
p = ptxt;
while(p->link->as == AFUNCDATA || p->link->as == APCDATA || p->link->as == ATYPE)
p = p->link;
if(stkptrsize >= 8*widthptr) {
p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
p = appendp(p, AADD, D_CONST, NREG, 4+frame-stkptrsize, D_REG, 1, 0);
p->reg = REGSP;
p = appendp(p, AADD, D_CONST, NREG, stkptrsize, D_REG, 2, 0);
p->reg = 1;
p1 = p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, 1, 4);
p->scond |= C_PBIT;
p = appendp(p, ACMP, D_REG, 1, 0, D_NONE, 0, 0);
p->reg = 2;
p = appendp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
patch(p, p1);
} else {
first = 1;
for(i=0; i<stkptrsize; i+=widthptr) {
if(bvget(bv, i/widthptr)) {
if(first) {
p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0);
first = 0;
}
p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, REGSP, 4+frame-stkptrsize+i);
}
}
}
}
static Prog*
appendp(Prog *p, int as, int ftype, int freg, int32 foffset, int ttype, int treg, int32 toffset)
{
Prog *q;
q = mal(sizeof(*q));
clearp(q);
q->as = as;
q->lineno = p->lineno;
q->from.type = ftype;
q->from.reg = freg;
q->from.offset = foffset;
q->to.type = ttype;
q->to.reg = treg;
q->to.offset = toffset;
q->link = p->link;
p->link = q;
return q;
}
// Sweep the prog list to mark any used nodes.
......
......@@ -9,15 +9,56 @@
#include "gg.h"
#include "opt.h"
static Prog* appendp(Prog*, int, int, vlong, int, vlong);
void
defframe(Prog *ptxt)
defframe(Prog *ptxt, Bvec *bv)
{
int i;
uint32 frame;
Prog *p;
// fill in argument size
ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
// fill in final stack size
ptxt->to.offset <<= 32;
ptxt->to.offset |= rnd(stksize+maxarg, widthptr);
frame = rnd(stksize+maxarg, widthptr);
ptxt->to.offset |= frame;
// insert code to clear pointered part of the frame,
// so that garbage collector only sees initialized values
// when it looks for pointers.
p = ptxt;
if(stkptrsize >= 8*widthptr) {
p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0);
p = appendp(p, AMOVQ, D_CONST, stkptrsize/widthptr, D_CX, 0);
p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkptrsize, D_DI, 0);
p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0);
} else {
for(i=0; i<stkptrsize; i+=widthptr)
if(bvget(bv, i/widthptr))
p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkptrsize+i);
}
}
static Prog*
appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)
{
Prog *q;
q = mal(sizeof(*q));
clearp(q);
q->as = as;
q->lineno = p->lineno;
q->from.type = ftype;
q->from.offset = foffset;
q->to.type = ttype;
q->to.offset = toffset;
q->link = p->link;
p->link = q;
return q;
}
// Sweep the prog list to mark any used nodes.
......
......@@ -9,17 +9,58 @@
#include "gg.h"
#include "opt.h"
static Prog* appendp(Prog*, int, int, int32, int, int32);
void
defframe(Prog *ptxt)
defframe(Prog *ptxt, Bvec *bv)
{
uint32 frame;
Prog *p;
int i;
// fill in argument size
ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr);
// fill in final stack size
if(stksize > maxstksize)
maxstksize = stksize;
ptxt->to.offset = rnd(maxstksize+maxarg, widthptr);
frame = rnd(maxstksize+maxarg, widthptr);
ptxt->to.offset = frame;
maxstksize = 0;
// insert code to clear pointered part of the frame,
// so that garbage collector only sees initialized values
// when it looks for pointers.
p = ptxt;
if(stkptrsize >= 8*widthptr) {
p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0);
p = appendp(p, AMOVL, D_CONST, stkptrsize/widthptr, D_CX, 0);
p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkptrsize, D_DI, 0);
p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0);
} else {
for(i=0; i<stkptrsize; i+=widthptr)
if(bvget(bv, i/widthptr))
p = appendp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame-stkptrsize+i);
}
}
static Prog*
appendp(Prog *p, int as, int ftype, int32 foffset, int ttype, int32 toffset)
{
Prog *q;
q = mal(sizeof(*q));
clearp(q);
q->as = as;
q->lineno = p->lineno;
q->from.type = ftype;
q->from.offset = foffset;
q->to.type = ttype;
q->to.offset = toffset;
q->link = p->link;
p->link = q;
return q;
}
// Sweep the prog list to mark any used nodes.
......
......@@ -1458,7 +1458,7 @@ void cgen_callinter(Node *n, Node *res, int proc);
void cgen_ret(Node *n);
void clearfat(Node *n);
void compile(Node*);
void defframe(Prog*);
void defframe(Prog*, Bvec*);
int dgostringptr(Sym*, int off, char *str);
int dgostrlitptr(Sym*, int off, Strlit*);
int dstringptr(Sym *s, int off, char *str);
......
......@@ -12,11 +12,12 @@ enum { BitsPerPointer = 2 };
static void allocauto(Prog* p);
static void dumpgcargs(Node*, Sym*);
static void dumpgclocals(Node*, Sym*);
static Bvec* dumpgclocals(Node*, Sym*);
void
compile(Node *fn)
{
Bvec *bv;
Plist *pl;
Node nod1, *n, *gcargsnod, *gclocalsnod;
Prog *ptxt, *p, *p1;
......@@ -179,15 +180,16 @@ compile(Node *fn)
goto ret;
}
defframe(ptxt);
// Emit garbage collection symbols.
dumpgcargs(fn, gcargssym);
bv = dumpgclocals(curfn, gclocalssym);
defframe(ptxt, bv);
free(bv);
if(0)
frame(0);
// Emit garbage collection symbols.
dumpgcargs(fn, gcargssym);
dumpgclocals(curfn, gclocalssym);
ret:
lineno = lno;
}
......@@ -329,8 +331,8 @@ dumpgcargs(Node *fn, Sym *sym)
// Compute a bit vector to describes the pointer containing locations
// in local variables and dumps the bitvector length and data out to
// the provided symbol.
static void
// the provided symbol. Returns the vector for use and freeing by caller.
static Bvec*
dumpgclocals(Node* fn, Sym *sym)
{
Bvec *bv;
......@@ -354,8 +356,8 @@ dumpgclocals(Node* fn, Sym *sym)
for(i = 0; i < bv->n; i += 32) {
off = duint32(sym, off, bv->b[i/32]);
}
free(bv);
ggloblsym(sym, off, 0, 1);
return bv;
}
// Sort the list of stack variables. Autos after anything else,
......
......@@ -2309,7 +2309,9 @@ paramstoheap(Type **argin, int out)
v = t->nname;
if(v && v->sym && v->sym->name[0] == '~')
v = N;
if(v == N && out && hasdefer) {
// The garbage collector assumes results are always live,
// so zero them always (1 ||).
if(out && (1 || (v == N && hasdefer))) {
// Defer might stop a panic and show the
// return values as they exist at the time of panic.
// Make sure to zero them on entry to the function.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment