Commit 031c107c authored by Russ Cox's avatar Russ Cox

cmd/ld: fix large stack split for preempt check

If the stack frame size is larger than the known-unmapped region at the
bottom of the address space, then the stack split prologue cannot use the usual
condition:

        SP - size >= stackguard

because SP - size may wrap around to a very large number.
Instead, if the stack frame is large, the prologue tests:

        SP - stackguard >= size

(This ends up being a few instructions more expensive, so we don't do it always.)

Preemption requests register by setting stackguard to a very large value, so
that the first test (SP - size >= stackguard) cannot possibly succeed.
Unfortunately, that same very large value causes a wraparound in the
second test (SP - stackguard >= size), making it succeed incorrectly.

To avoid *that* wraparound, we have to amend the test:

        stackguard != StackPreempt && SP - stackguard >= size

This test is only used for functions with large frames, which essentially
always split the stack, so the cost of the few instructions is noise.

This CL and CL 11085043 together fix the known issues with preemption,
at the beginning of a function, so we will be able to try turning it on again.

R=ken2
CC=golang-dev
https://golang.org/cl/11205043
parent 56cd47b2
...@@ -209,15 +209,22 @@ noops(void) ...@@ -209,15 +209,22 @@ noops(void)
p->from.reg = 1; p->from.reg = 1;
p->reg = 2; p->reg = 2;
} else { } else {
// such a large stack we need to protect against wraparound // Such a large stack we need to protect against wraparound
// if SP is close to zero. // if SP is close to zero.
// SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall) // SP-stackguard+StackGuard < framesize + (StackGuard-StackSmall)
// The +StackGuard on both sides is required to keep the left side positive: // The +StackGuard on both sides is required to keep the left side positive:
// SP is allowed to be slightly below stackguard. See stack.h. // SP is allowed to be slightly below stackguard. See stack.h.
// MOVW $StackGuard(SP), R2 // CMP $StackPreempt, R1
// SUB R1, R2 // MOVW.NE $StackGuard(SP), R2
// MOVW $(autosize+(StackGuard-StackSmall)), R3 // SUB.NE R1, R2
// CMP R3, R2 // MOVW.NE $(autosize+(StackGuard-StackSmall)), R3
// CMP.NE R3, R2
p = appendp(p);
p->as = ACMP;
p->from.type = D_CONST;
p->from.offset = (uint32)StackPreempt;
p->reg = 1;
p = appendp(p); p = appendp(p);
p->as = AMOVW; p->as = AMOVW;
p->from.type = D_CONST; p->from.type = D_CONST;
...@@ -225,6 +232,7 @@ noops(void) ...@@ -225,6 +232,7 @@ noops(void)
p->from.offset = StackGuard; p->from.offset = StackGuard;
p->to.type = D_REG; p->to.type = D_REG;
p->to.reg = 2; p->to.reg = 2;
p->scond = C_SCOND_NE;
p = appendp(p); p = appendp(p);
p->as = ASUB; p->as = ASUB;
...@@ -232,6 +240,7 @@ noops(void) ...@@ -232,6 +240,7 @@ noops(void)
p->from.reg = 1; p->from.reg = 1;
p->to.type = D_REG; p->to.type = D_REG;
p->to.reg = 2; p->to.reg = 2;
p->scond = C_SCOND_NE;
p = appendp(p); p = appendp(p);
p->as = AMOVW; p->as = AMOVW;
...@@ -239,12 +248,14 @@ noops(void) ...@@ -239,12 +248,14 @@ noops(void)
p->from.offset = autosize + (StackGuard - StackSmall); p->from.offset = autosize + (StackGuard - StackSmall);
p->to.type = D_REG; p->to.type = D_REG;
p->to.reg = 3; p->to.reg = 3;
p->scond = C_SCOND_NE;
p = appendp(p); p = appendp(p);
p->as = ACMP; p->as = ACMP;
p->from.type = D_REG; p->from.type = D_REG;
p->from.reg = 3; p->from.reg = 3;
p->reg = 2; p->reg = 2;
p->scond = C_SCOND_NE;
} }
// MOVW.LS $autosize, R1 // MOVW.LS $autosize, R1
......
...@@ -438,6 +438,7 @@ dostkoff(void) ...@@ -438,6 +438,7 @@ dostkoff(void)
} }
q = P; q = P;
q1 = P;
if((p->from.scale & NOSPLIT) && autoffset >= StackSmall) if((p->from.scale & NOSPLIT) && autoffset >= StackSmall)
diag("nosplit func likely to overflow stack"); diag("nosplit func likely to overflow stack");
...@@ -497,6 +498,7 @@ dostkoff(void) ...@@ -497,6 +498,7 @@ dostkoff(void)
q1->pcond = p; q1->pcond = p;
} }
q1 = P;
if(autoffset <= StackSmall) { if(autoffset <= StackSmall) {
// small stack: SP <= stackguard // small stack: SP <= stackguard
// CMPQ SP, stackguard // CMPQ SP, stackguard
...@@ -519,14 +521,38 @@ dostkoff(void) ...@@ -519,14 +521,38 @@ dostkoff(void)
p->from.type = D_AX; p->from.type = D_AX;
p->to.type = D_INDIR+D_CX; p->to.type = D_INDIR+D_CX;
} else { } else {
// such a large stack we need to protect against wraparound // Such a large stack we need to protect against wraparound.
// if SP is close to zero: // If SP is close to zero:
// SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
// The +StackGuard on both sides is required to keep the left side positive: // The +StackGuard on both sides is required to keep the left side positive:
// SP is allowed to be slightly below stackguard. See stack.h. // SP is allowed to be slightly below stackguard. See stack.h.
//
// Preemption sets stackguard to StackPreempt, a very large value.
// That breaks the math above, so we have to check for that explicitly.
// MOVQ stackguard, CX
// CMPQ CX, $StackPreempt
// JEQ label-of-call-to-morestack
// LEAQ StackGuard(SP), AX // LEAQ StackGuard(SP), AX
// SUBQ stackguard, AX // SUBQ CX, AX
// CMPQ AX, $(autoffset+(StackGuard-StackSmall)) // CMPQ AX, $(autoffset+(StackGuard-StackSmall))
p = appendp(p);
p->as = AMOVQ;
p->from.type = D_INDIR+D_CX;
p->from.offset = 0;
p->to.type = D_SI;
p = appendp(p);
p->as = ACMPQ;
p->from.type = D_SI;
p->to.type = D_CONST;
p->to.offset = StackPreempt;
p = appendp(p);
p->as = AJEQ;
p->to.type = D_BRANCH;
q1 = p;
p = appendp(p); p = appendp(p);
p->as = ALEAQ; p->as = ALEAQ;
p->from.type = D_INDIR+D_SP; p->from.type = D_INDIR+D_SP;
...@@ -535,8 +561,7 @@ dostkoff(void) ...@@ -535,8 +561,7 @@ dostkoff(void)
p = appendp(p); p = appendp(p);
p->as = ASUBQ; p->as = ASUBQ;
p->from.type = D_INDIR+D_CX; p->from.type = D_SI;
p->from.offset = 0;
p->to.type = D_AX; p->to.type = D_AX;
p = appendp(p); p = appendp(p);
...@@ -550,7 +575,6 @@ dostkoff(void) ...@@ -550,7 +575,6 @@ dostkoff(void)
p = appendp(p); p = appendp(p);
p->as = AJHI; p->as = AJHI;
p->to.type = D_BRANCH; p->to.type = D_BRANCH;
p->to.offset = 4;
q = p; q = p;
// If we ask for more stack, we'll get a minimum of StackMin bytes. // If we ask for more stack, we'll get a minimum of StackMin bytes.
...@@ -627,6 +651,8 @@ dostkoff(void) ...@@ -627,6 +651,8 @@ dostkoff(void)
if(q != P) if(q != P)
q->pcond = p->link; q->pcond = p->link;
if(q1 != P)
q1->pcond = q->link;
if(autoffset) { if(autoffset) {
p = appendp(p); p = appendp(p);
......
...@@ -439,6 +439,7 @@ dostkoff(void) ...@@ -439,6 +439,7 @@ dostkoff(void)
autoffset = 0; autoffset = 0;
q = P; q = P;
q1 = P;
if(pmorestack != P) if(pmorestack != P)
if(!(p->from.scale & NOSPLIT)) { if(!(p->from.scale & NOSPLIT)) {
p = appendp(p); // load g into CX p = appendp(p); // load g into CX
...@@ -525,6 +526,7 @@ dostkoff(void) ...@@ -525,6 +526,7 @@ dostkoff(void)
p->as = ANOP; p->as = ANOP;
q1->pcond = p; q1->pcond = p;
} }
q1 = P;
if(autoffset <= StackSmall) { if(autoffset <= StackSmall) {
// small stack: SP <= stackguard // small stack: SP <= stackguard
...@@ -548,14 +550,37 @@ dostkoff(void) ...@@ -548,14 +550,37 @@ dostkoff(void)
p->from.type = D_AX; p->from.type = D_AX;
p->to.type = D_INDIR+D_CX; p->to.type = D_INDIR+D_CX;
} else { } else {
// such a large stack we need to protect against wraparound // Such a large stack we need to protect against wraparound
// if SP is close to zero. // if SP is close to zero.
// SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall) // SP-stackguard+StackGuard <= framesize + (StackGuard-StackSmall)
// The +StackGuard on both sides is required to keep the left side positive: // The +StackGuard on both sides is required to keep the left side positive:
// SP is allowed to be slightly below stackguard. See stack.h. // SP is allowed to be slightly below stackguard. See stack.h.
//
// Preemption sets stackguard to StackPreempt, a very large value.
// That breaks the math above, so we have to check for that explicitly.
// MOVL stackguard, CX
// CMPL CX, $StackPreempt
// JEQ label-of-call-to-morestack
// LEAL StackGuard(SP), AX // LEAL StackGuard(SP), AX
// SUBL stackguard, AX // SUBL stackguard, AX
// CMPL AX, $(autoffset+(StackGuard-StackSmall)) // CMPL AX, $(autoffset+(StackGuard-StackSmall))
p = appendp(p);
p->as = AMOVL;
p->from.type = D_INDIR+D_CX;
p->from.offset = 0;
p->to.type = D_SI;
p = appendp(p);
p->as = ACMPL;
p->from.type = D_SI;
p->to.type = D_CONST;
p->to.offset = (uint32)StackPreempt;
p = appendp(p);
p->as = AJEQ;
p->to.type = D_BRANCH;
q1 = p;
p = appendp(p); p = appendp(p);
p->as = ALEAL; p->as = ALEAL;
p->from.type = D_INDIR+D_SP; p->from.type = D_INDIR+D_SP;
...@@ -564,7 +589,7 @@ dostkoff(void) ...@@ -564,7 +589,7 @@ dostkoff(void)
p = appendp(p); p = appendp(p);
p->as = ASUBL; p->as = ASUBL;
p->from.type = D_INDIR+D_CX; p->from.type = D_SI;
p->from.offset = 0; p->from.offset = 0;
p->to.type = D_AX; p->to.type = D_AX;
...@@ -618,6 +643,8 @@ dostkoff(void) ...@@ -618,6 +643,8 @@ dostkoff(void)
if(q != P) if(q != P)
q->pcond = p->link; q->pcond = p->link;
if(q1 != P)
q1->pcond = q->link;
if(autoffset) { if(autoffset) {
p = appendp(p); p = appendp(p);
......
...@@ -219,6 +219,76 @@ func stackGrowthRecursive(i int) { ...@@ -219,6 +219,76 @@ func stackGrowthRecursive(i int) {
} }
} }
func TestPreemptSplitBig(t *testing.T) {
if testing.Short() {
t.Skip("skipping in -short mode")
}
defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(2))
stop := make(chan int)
go big(stop)
for i := 0; i < 3; i++ {
time.Sleep(1 * time.Microsecond) // let big start running
runtime.GC()
}
close(stop)
}
func big(stop chan int) int {
n := 0
for {
// delay so that gc is sure to have asked for a preemption
for i := int64(0); i < 1e9; i++ {
n++
}
// call bigframe, which used to miss the preemption in its prologue.
bigframe(stop)
// check if we've been asked to stop.
select {
case <-stop:
return n
}
}
}
func bigframe(stop chan int) int {
// not splitting the stack will overflow.
// small will notice that it needs a stack split and will
// catch the overflow.
var x [8192]byte
return small(stop, &x)
}
func small(stop chan int, x *[8192]byte) int {
for i := range x {
x[i] = byte(i)
}
sum := 0
for i := range x {
sum += int(x[i])
}
// keep small from being a leaf function, which might
// make it not do any stack check at all.
nonleaf(stop)
return sum
}
func nonleaf(stop chan int) bool {
// do something that won't be inlined:
select {
case <-stop:
return true
default:
return false
}
}
func poll() {
}
func TestSchedLocalQueue(t *testing.T) { func TestSchedLocalQueue(t *testing.T) {
runtime.TestSchedLocalQueue1() runtime.TestSchedLocalQueue1()
} }
......
...@@ -241,7 +241,7 @@ runtime·newstack(void) ...@@ -241,7 +241,7 @@ runtime·newstack(void)
runtime·throw("runtime: stack split argsize"); runtime·throw("runtime: stack split argsize");
} }
if(gp->stackguard0 == StackPreempt) { if(gp->stackguard0 == (uintptr)StackPreempt) {
if(gp == m->g0) if(gp == m->g0)
runtime·throw("runtime: preempt g0"); runtime·throw("runtime: preempt g0");
if(oldstatus == Grunning && (m->p == nil || m->p->status != Prunning)) if(oldstatus == Grunning && (m->p == nil || m->p->status != Prunning))
......
...@@ -109,4 +109,4 @@ enum { ...@@ -109,4 +109,4 @@ enum {
// Stored into g->stackguard0 to cause split stack check failure. // Stored into g->stackguard0 to cause split stack check failure.
// Must be greater than any real sp. // Must be greater than any real sp.
// 0xfffffade in hex. // 0xfffffade in hex.
#define StackPreempt ((uintptr)-1314) #define StackPreempt ((uint64)-1314)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment