Commit b3932bab authored by Russ Cox's avatar Russ Cox

runtime: fix sudog leak

The SudoG used to sit on the stack, so it was cheap to allocated
and didn't need to be cleaned up when finished.

For the conversion to Go, we had to move sudog off the stack
for a few reasons, so we added a cache of recently used sudogs
to keep allocation cheap. But we didn't add any of the necessary
cleanup before adding a SudoG to the new cache, and so the cached
SudoGs had stale pointers inside them that have caused all sorts
of awful, hard to debug problems.

CL 155760043 made sure SudoG.elem is cleaned up.
CL 150520043 made sure SudoG.selectdone is cleaned up.

This CL makes sure SudoG.next, SudoG.prev, and SudoG.waitlink
are cleaned up. I should have done this when I did the other two
fields; instead I wasted a week tracking down a leak they caused.

A dangling SudoG.waitlink can point into a sudogcache list that
has been "forgotten" in order to let the GC collect it, but that
dangling .waitlink keeps the list from being collected.
And then the list holding the SudoG with the dangling waitlink
can find itself in the same situation, and so on. We end up
with lists of lists of unusable SudoGs that are still linked into
the object graph and never collected (given the right mix of
non-trivial selects and non-channel synchronization).

More details in golang.org/issue/9110.

Fixes #9110.

LGTM=r
R=r
CC=dvyukov, golang-codereviews, iant, khr
https://golang.org/cl/177870043
parent 6150414c
......@@ -630,6 +630,7 @@ func (q *waitq) dequeue() *sudog {
return nil
}
q.first = sgp.next
sgp.next = nil
if q.last == sgp {
q.last = nil
}
......
......@@ -51,10 +51,26 @@ func clearpools() {
if c := p.mcache; c != nil {
c.tiny = nil
c.tinysize = 0
// disconnect cached list before dropping it on the floor,
// so that a dangling ref to one entry does not pin all of them.
var sg, sgnext *sudog
for sg = c.sudogcache; sg != nil; sg = sgnext {
sgnext = sg.next
sg.next = nil
}
c.sudogcache = nil
}
// clear defer pools
for i := range p.deferpool {
// disconnect cached list before dropping it on the floor,
// so that a dangling ref to one entry does not pin all of them.
var d, dlink *_defer
for d = p.deferpool[i]; d != nil; d = dlink {
dlink = d.link
d.link = nil
}
p.deferpool[i] = nil
}
}
......
......@@ -152,6 +152,7 @@ func acquireSudog() *sudog {
gothrow("acquireSudog: found s.elem != nil in cache")
}
c.sudogcache = s.next
s.next = nil
return s
}
......@@ -177,6 +178,15 @@ func releaseSudog(s *sudog) {
if s.selectdone != nil {
gothrow("runtime: sudog with non-nil selectdone")
}
if s.next != nil {
gothrow("runtime: sudog with non-nil next")
}
if s.prev != nil {
gothrow("runtime: sudog with non-nil prev")
}
if s.waitlink != nil {
gothrow("runtime: sudog with non-nil waitlink")
}
gp := getg()
if gp.param != nil {
gothrow("runtime: releaseSudog with non-nil gp.param")
......
......@@ -404,6 +404,7 @@ loop:
}
}
sgnext = sglist.waitlink
sglist.waitlink = nil
releaseSudog(sglist)
sglist = sgnext
}
......@@ -641,6 +642,7 @@ func (q *waitq) dequeueSudoG(s *sudog) {
if q.last == sgp {
q.last = prevsgp
}
s.next = nil
return
}
l = &sgp.next
......
......@@ -201,6 +201,7 @@ func syncsemacquire(s *syncSema) {
}
unlock(&s.lock)
if wake != nil {
wake.next = nil
goready(wake.g)
}
} else {
......@@ -242,6 +243,7 @@ func syncsemrelease(s *syncSema, n uint32) {
if wake.releasetime != 0 {
wake.releasetime = cputicks()
}
wake.next = nil
goready(wake.g)
n--
}
......
// run
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Scenario that used to leak arbitrarily many SudoG structs.
// See golang.org/issue/9110.
package main
import (
"runtime"
"runtime/debug"
"sync"
"time"
)
func main() {
debug.SetGCPercent(1000000) // only GC when we ask for GC
var stats, stats1, stats2 runtime.MemStats
release := func() {}
for i := 0; i < 20; i++ {
if i == 10 {
// Should be warmed up by now.
runtime.ReadMemStats(&stats1)
}
c := make(chan int)
for i := 0; i < 10; i++ {
go func() {
select {
case <-c:
case <-c:
case <-c:
}
}()
}
time.Sleep(1 * time.Millisecond)
release()
close(c) // let select put its sudog's into the cache
time.Sleep(1 * time.Millisecond)
// pick up top sudog
var cond1 sync.Cond
var mu1 sync.Mutex
cond1.L = &mu1
go func() {
mu1.Lock()
cond1.Wait()
mu1.Unlock()
}()
time.Sleep(1 * time.Millisecond)
// pick up next sudog
var cond2 sync.Cond
var mu2 sync.Mutex
cond2.L = &mu2
go func() {
mu2.Lock()
cond2.Wait()
mu2.Unlock()
}()
time.Sleep(1 * time.Millisecond)
// put top sudog back
cond1.Broadcast()
time.Sleep(1 * time.Millisecond)
// drop cache on floor
runtime.GC()
// release cond2 after select has gotten to run
release = func() {
cond2.Broadcast()
time.Sleep(1 * time.Millisecond)
}
}
runtime.GC()
runtime.ReadMemStats(&stats2)
if int(stats2.HeapObjects)-int(stats1.HeapObjects) > 20 { // normally at most 1 or 2; was 300 with leak
print("BUG: object leak: ", stats.HeapObjects, " -> ", stats1.HeapObjects, " -> ", stats2.HeapObjects, "\n")
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment