Commit bef4efc8 authored by Austin Clements's avatar Austin Clements

internal/trace: add "per-P" MMU analysis

The current MMU analysis considers all Ps together, so if, for
example, one of four Ps is blocked, mutator utilization is 75%.
However, this is less useful for understanding the impact on
individual goroutines because that one blocked goroutine could be
blocked for a very long time, but we still appear to have good
utilization.

Hence, this introduces a new flag that does a "per-P" analysis where
the utilization of each P is considered independently. The MMU is then
the combination of the MMU for each P's utilization function.

Change-Id: Id67b980d4d82b511d28300cdf92ccbb5ae8f0c78
Reviewed-on: https://go-review.googlesource.com/c/60797
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarHyang-Ah Hana Kim <hyangah@gmail.com>
parent 27920c8d
...@@ -27,12 +27,12 @@ func init() { ...@@ -27,12 +27,12 @@ func init() {
var mmuCache struct { var mmuCache struct {
init sync.Once init sync.Once
util []trace.MutatorUtil util [][]trace.MutatorUtil
mmuCurve *trace.MMUCurve mmuCurve *trace.MMUCurve
err error err error
} }
func getMMUCurve() ([]trace.MutatorUtil, *trace.MMUCurve, error) { func getMMUCurve() ([][]trace.MutatorUtil, *trace.MMUCurve, error) {
mmuCache.init.Do(func() { mmuCache.init.Do(func() {
tr, err := parseTrace() tr, err := parseTrace()
if err != nil { if err != nil {
...@@ -69,7 +69,16 @@ func httpMMUPlot(w http.ResponseWriter, r *http.Request) { ...@@ -69,7 +69,16 @@ func httpMMUPlot(w http.ResponseWriter, r *http.Request) {
// Cover six orders of magnitude. // Cover six orders of magnitude.
xMax := xMin * 1e6 xMax := xMin * 1e6
// But no more than the length of the trace. // But no more than the length of the trace.
if maxMax := time.Duration(mu[len(mu)-1].Time - mu[0].Time); xMax > maxMax { minEvent, maxEvent := mu[0][0].Time, mu[0][len(mu[0])-1].Time
for _, mu1 := range mu[1:] {
if mu1[0].Time < minEvent {
minEvent = mu1[0].Time
}
if mu1[len(mu1)-1].Time > maxEvent {
maxEvent = mu1[len(mu1)-1].Time
}
}
if maxMax := time.Duration(maxEvent - minEvent); xMax > maxMax {
xMax = maxMax xMax = maxMax
} }
// Compute MMU curve. // Compute MMU curve.
......
...@@ -36,27 +36,64 @@ const ( ...@@ -36,27 +36,64 @@ const (
UtilAssist UtilAssist
// UtilSweep means utilization should account for sweeping. // UtilSweep means utilization should account for sweeping.
UtilSweep UtilSweep
// UtilPerProc means each P should be given a separate
// utilization function. Otherwise, there is a single function
// and each P is given a fraction of the utilization.
UtilPerProc
) )
// MutatorUtilization returns the mutator utilization function for the // MutatorUtilization returns a set of mutator utilization functions
// given trace. This function will always end with 0 utilization. The // for the given trace. Each function will always end with 0
// bounds of the function are implicit in the first and last event; // utilization. The bounds of each function are implicit in the first
// outside of these bounds the function is undefined. // and last event; outside of these bounds each function is undefined.
func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil { //
// If the UtilPerProc flag is not given, this always returns a single
// utilization function. Otherwise, it returns one function per P.
func (p *Parsed) MutatorUtilization(flags UtilFlags) [][]MutatorUtil {
events := p.Events events := p.Events
if len(events) == 0 { if len(events) == 0 {
return nil return nil
} }
gomaxprocs, gcPs, stw := 1, 0, 0 type perP struct {
out := []MutatorUtil{{events[0].Ts, 1}} // gc > 0 indicates that GC is active on this P.
gc int
// series the logical series number for this P. This
// is necessary because Ps may be removed and then
// re-added, and then the new P needs a new series.
series int
}
ps := []perP{}
stw := 0
out := [][]MutatorUtil{}
assists := map[uint64]bool{} assists := map[uint64]bool{}
block := map[uint64]*Event{} block := map[uint64]*Event{}
bgMark := map[uint64]bool{} bgMark := map[uint64]bool{}
for _, ev := range events { for _, ev := range events {
switch ev.Type { switch ev.Type {
case EvGomaxprocs: case EvGomaxprocs:
gomaxprocs = int(ev.Args[0]) gomaxprocs := int(ev.Args[0])
if len(ps) > gomaxprocs {
if flags&UtilPerProc != 0 {
// End each P's series.
for _, p := range ps[gomaxprocs:] {
out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
}
}
ps = ps[:gomaxprocs]
}
for len(ps) < gomaxprocs {
// Start new P's series.
series := 0
if flags&UtilPerProc != 0 || len(out) == 0 {
series = len(out)
out = append(out, []MutatorUtil{{ev.Ts, 1}})
}
ps = append(ps, perP{series: series})
}
case EvGCSTWStart: case EvGCSTWStart:
if flags&UtilSTW != 0 { if flags&UtilSTW != 0 {
stw++ stw++
...@@ -67,33 +104,41 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil { ...@@ -67,33 +104,41 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
} }
case EvGCMarkAssistStart: case EvGCMarkAssistStart:
if flags&UtilAssist != 0 { if flags&UtilAssist != 0 {
gcPs++ ps[ev.P].gc++
assists[ev.G] = true assists[ev.G] = true
} }
case EvGCMarkAssistDone: case EvGCMarkAssistDone:
if flags&UtilAssist != 0 { if flags&UtilAssist != 0 {
gcPs-- ps[ev.P].gc--
delete(assists, ev.G) delete(assists, ev.G)
} }
case EvGCSweepStart: case EvGCSweepStart:
if flags&UtilSweep != 0 { if flags&UtilSweep != 0 {
gcPs++ ps[ev.P].gc++
} }
case EvGCSweepDone: case EvGCSweepDone:
if flags&UtilSweep != 0 { if flags&UtilSweep != 0 {
gcPs-- ps[ev.P].gc--
} }
case EvGoStartLabel: case EvGoStartLabel:
if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" { if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
// Background mark worker. // Background mark worker.
bgMark[ev.G] = true //
gcPs++ // If we're in per-proc mode, we don't
// count dedicated workers because
// they kick all of the goroutines off
// that P, so don't directly
// contribute to goroutine latency.
if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
bgMark[ev.G] = true
ps[ev.P].gc++
}
} }
fallthrough fallthrough
case EvGoStart: case EvGoStart:
if assists[ev.G] { if assists[ev.G] {
// Unblocked during assist. // Unblocked during assist.
gcPs++ ps[ev.P].gc++
} }
block[ev.G] = ev.Link block[ev.G] = ev.Link
default: default:
...@@ -103,49 +148,77 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil { ...@@ -103,49 +148,77 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
if assists[ev.G] { if assists[ev.G] {
// Blocked during assist. // Blocked during assist.
gcPs-- ps[ev.P].gc--
} }
if bgMark[ev.G] { if bgMark[ev.G] {
// Background mark worker done. // Background mark worker done.
gcPs-- ps[ev.P].gc--
delete(bgMark, ev.G) delete(bgMark, ev.G)
} }
delete(block, ev.G) delete(block, ev.G)
} }
ps := gcPs if flags&UtilPerProc == 0 {
if stw > 0 { // Compute the current average utilization.
ps = gomaxprocs if len(ps) == 0 {
} continue
mu := MutatorUtil{ev.Ts, 1 - float64(ps)/float64(gomaxprocs)}
if mu.Util == out[len(out)-1].Util {
// No change.
continue
}
if mu.Time == out[len(out)-1].Time {
// Take the lowest utilization at a time stamp.
if mu.Util < out[len(out)-1].Util {
out[len(out)-1] = mu
} }
gcPs := 0
if stw > 0 {
gcPs = len(ps)
} else {
for i := range ps {
if ps[i].gc > 0 {
gcPs++
}
}
}
mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
// Record the utilization change. (Since
// len(ps) == len(out), we know len(out) > 0.)
out[0] = addUtil(out[0], mu)
} else { } else {
out = append(out, mu) // Check for per-P utilization changes.
for i := range ps {
p := &ps[i]
util := 1.0
if stw > 0 || p.gc > 0 {
util = 0.0
}
out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
}
} }
} }
// Add final 0 utilization event. This is important to mark // Add final 0 utilization event to any remaining series. This
// the end of the trace. The exact value shouldn't matter // is important to mark the end of the trace. The exact value
// since no window should extend beyond this, but using 0 is // shouldn't matter since no window should extend beyond this,
// symmetric with the start of the trace. // but using 0 is symmetric with the start of the trace.
endTime := events[len(events)-1].Ts mu := MutatorUtil{events[len(events)-1].Ts, 0}
if out[len(out)-1].Time == endTime { for i := range ps {
out[len(out)-1].Util = 0 out[ps[i].series] = addUtil(out[ps[i].series], mu)
} else {
out = append(out, MutatorUtil{endTime, 0})
} }
return out return out
} }
func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
if len(util) > 0 {
if mu.Util == util[len(util)-1].Util {
// No change.
return util
}
if mu.Time == util[len(util)-1].Time {
// Take the lowest utilization at a time stamp.
if mu.Util < util[len(util)-1].Util {
util[len(util)-1] = mu
}
return util
}
}
return append(util, mu)
}
// totalUtil is total utilization, measured in nanoseconds. This is a // totalUtil is total utilization, measured in nanoseconds. This is a
// separate type primarily to distinguish it from mean utilization, // separate type primarily to distinguish it from mean utilization,
// which is also a float64. // which is also a float64.
...@@ -163,6 +236,10 @@ func (u totalUtil) mean(dur time.Duration) float64 { ...@@ -163,6 +236,10 @@ func (u totalUtil) mean(dur time.Duration) float64 {
// An MMUCurve is the minimum mutator utilization curve across // An MMUCurve is the minimum mutator utilization curve across
// multiple window sizes. // multiple window sizes.
type MMUCurve struct { type MMUCurve struct {
series []mmuSeries
}
type mmuSeries struct {
util []MutatorUtil util []MutatorUtil
// sums[j] is the cumulative sum of util[:j]. // sums[j] is the cumulative sum of util[:j].
sums []totalUtil sums []totalUtil
...@@ -188,7 +265,15 @@ type mmuBand struct { ...@@ -188,7 +265,15 @@ type mmuBand struct {
// NewMMUCurve returns an MMU curve for the given mutator utilization // NewMMUCurve returns an MMU curve for the given mutator utilization
// function. // function.
func NewMMUCurve(util []MutatorUtil) *MMUCurve { func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
series := make([]mmuSeries, len(utils))
for i, util := range utils {
series[i] = newMMUSeries(util)
}
return &MMUCurve{series}
}
func newMMUSeries(util []MutatorUtil) mmuSeries {
// Compute cumulative sum. // Compute cumulative sum.
sums := make([]totalUtil, len(util)) sums := make([]totalUtil, len(util))
var prev MutatorUtil var prev MutatorUtil
...@@ -218,10 +303,10 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve { ...@@ -218,10 +303,10 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve {
// Compute the bands. There are numBands+1 bands in order to // Compute the bands. There are numBands+1 bands in order to
// record the final cumulative sum. // record the final cumulative sum.
bands := make([]mmuBand, numBands+1) bands := make([]mmuBand, numBands+1)
c := MMUCurve{util, sums, bands, bandDur} s := mmuSeries{util, sums, bands, bandDur}
leftSum := integrator{&c, 0} leftSum := integrator{&s, 0}
for i := range bands { for i := range bands {
startTime, endTime := c.bandTime(i) startTime, endTime := s.bandTime(i)
cumUtil := leftSum.advance(startTime) cumUtil := leftSum.advance(startTime)
predIdx := leftSum.pos predIdx := leftSum.pos
minUtil := 1.0 minUtil := 1.0
...@@ -231,16 +316,18 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve { ...@@ -231,16 +316,18 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve {
bands[i] = mmuBand{minUtil, cumUtil, leftSum} bands[i] = mmuBand{minUtil, cumUtil, leftSum}
} }
return &c return s
} }
func (c *MMUCurve) bandTime(i int) (start, end int64) { func (s *mmuSeries) bandTime(i int) (start, end int64) {
start = int64(i)*c.bandDur + c.util[0].Time start = int64(i)*s.bandDur + s.util[0].Time
end = start + c.bandDur end = start + s.bandDur
return return
} }
type bandUtil struct { type bandUtil struct {
// Utilization series index
series int
// Band index // Band index
i int i int
// Lower bound of mutator utilization for all windows // Lower bound of mutator utilization for all windows
...@@ -402,12 +489,22 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) { ...@@ -402,12 +489,22 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
acc.mmu = 0 acc.mmu = 0
return return
} }
util := c.util
if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
window = max
}
bandU := bandUtilHeap(c.mkBandUtil(window)) var bandU bandUtilHeap
windows := make([]time.Duration, len(c.series))
for i, s := range c.series {
windows[i] = window
if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
windows[i] = max
}
bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
if bandU == nil {
bandU = bandU1
} else {
bandU = append(bandU, bandU1...)
}
}
// Process bands from lowest utilization bound to highest. // Process bands from lowest utilization bound to highest.
heap.Init(&bandU) heap.Init(&bandU)
...@@ -416,12 +513,13 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) { ...@@ -416,12 +513,13 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
// refining the next lowest band can no longer affect the MMU // refining the next lowest band can no longer affect the MMU
// or windows. // or windows.
for len(bandU) > 0 && bandU[0].utilBound < acc.bound { for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
c.bandMMU(bandU[0].i, window, acc) i := bandU[0].series
c.series[i].bandMMU(bandU[0].i, windows[i], acc)
heap.Pop(&bandU) heap.Pop(&bandU)
} }
} }
func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil { func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
// For each band, compute the worst-possible total mutator // For each band, compute the worst-possible total mutator
// utilization for all windows that start in that band. // utilization for all windows that start in that band.
...@@ -477,7 +575,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil { ...@@ -477,7 +575,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
} }
bandU[i] = bandUtil{i, util.mean(window)} bandU[i] = bandUtil{series, i, util.mean(window)}
} }
return bandU return bandU
...@@ -485,7 +583,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil { ...@@ -485,7 +583,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
// bandMMU computes the precise minimum mutator utilization for // bandMMU computes the precise minimum mutator utilization for
// windows with a left edge in band bandIdx. // windows with a left edge in band bandIdx.
func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator) { func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
util := c.util util := c.util
// We think of the mutator utilization over time as the // We think of the mutator utilization over time as the
...@@ -542,7 +640,7 @@ func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator) ...@@ -542,7 +640,7 @@ func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator)
// An integrator tracks a position in a utilization function and // An integrator tracks a position in a utilization function and
// integrates it. // integrates it.
type integrator struct { type integrator struct {
u *MMUCurve u *mmuSeries
// pos is the index in u.util of the current time's non-strict // pos is the index in u.util of the current time's non-strict
// predecessor. // predecessor.
pos int pos int
......
...@@ -29,14 +29,14 @@ func TestMMU(t *testing.T) { ...@@ -29,14 +29,14 @@ func TestMMU(t *testing.T) {
// 0.5 * * * * // 0.5 * * * *
// 0.0 ***** ***** // 0.0 ***** *****
// 0 1 2 3 4 5 // 0 1 2 3 4 5
util := []MutatorUtil{ util := [][]MutatorUtil{{
{0e9, 1}, {0e9, 1},
{1e9, 0}, {1e9, 0},
{2e9, 1}, {2e9, 1},
{3e9, 0}, {3e9, 0},
{4e9, 1}, {4e9, 1},
{5e9, 0}, {5e9, 0},
} }}
mmuCurve := NewMMUCurve(util) mmuCurve := NewMMUCurve(util)
for _, test := range []struct { for _, test := range []struct {
...@@ -90,7 +90,7 @@ func TestMMUTrace(t *testing.T) { ...@@ -90,7 +90,7 @@ func TestMMUTrace(t *testing.T) {
// Test the optimized implementation against the "obviously // Test the optimized implementation against the "obviously
// correct" implementation. // correct" implementation.
for window := time.Nanosecond; window < 10*time.Second; window *= 10 { for window := time.Nanosecond; window < 10*time.Second; window *= 10 {
want := mmuSlow(mu, window) want := mmuSlow(mu[0], window)
got := mmuCurve.MMU(window) got := mmuCurve.MMU(window)
if !aeq(want, got) { if !aeq(want, got) {
t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window) t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment