Commit 27ee719f authored by Raul Silvera's avatar Raul Silvera Committed by Minux Ma

pprof: improve sampling for heap profiling

The current heap sampling introduces some bias that interferes
with unsampling, producing unexpected heap profiles.
The solution is to use a Poisson process to generate the
sampling points, using the formulas described at
https://en.wikipedia.org/wiki/Poisson_process

This fixes #12620

Change-Id: If2400809ed3c41de504dd6cff06be14e476ff96c
Reviewed-on: https://go-review.googlesource.com/14590Reviewed-by: 's avatarKeith Randall <khr@golang.org>
Reviewed-by: 's avatarMinux Ma <minux@golang.org>
Run-TryBot: Minux Ma <minux@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 0357c38a
......@@ -26,6 +26,8 @@ var Xadduintptr = xadduintptr
var FuncPC = funcPC
var Fastlog2 = fastlog2
type LFNode struct {
Next uint64
Pushcnt uintptr
......
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime
import "unsafe"
// fastlog2 implements a fast approximation to the base 2 log of a
// float64. This is used to compute a geometric distribution for heap
// sampling, without introducing dependences into package math. This
// uses a very rough approximation using the float64 exponent and the
// first 25 bits of the mantissa. The top 5 bits of the mantissa are
// used to load limits from a table of constants and the rest are used
// to scale linearly between them.
func fastlog2(x float64) float64 {
const fastlogScaleBits = 20
const fastlogScaleRatio = 1.0 / (1 << fastlogScaleBits)
xBits := float64bits(x)
// Extract the exponent from the IEEE float64, and index a constant
// table with the first 10 bits from the mantissa.
xExp := int64((xBits>>52)&0x7FF) - 1023
xManIndex := (xBits >> (52 - fastlogNumBits)) % (1 << fastlogNumBits)
xManScale := (xBits >> (52 - fastlogNumBits - fastlogScaleBits)) % (1 << fastlogScaleBits)
low, high := fastlog2Table[xManIndex], fastlog2Table[xManIndex+1]
return float64(xExp) + low + (high-low)*float64(xManScale)*fastlogScaleRatio
}
// float64bits returns the IEEE 754 binary representation of f.
// Taken from math.Float64bits to avoid dependences into package math.
func float64bits(f float64) uint64 { return *(*uint64)(unsafe.Pointer(&f)) }
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runtime_test
import (
"math"
"runtime"
"testing"
)
func TestFastLog2(t *testing.T) {
// Compute the euclidean distance between math.Log2 and the FastLog2
// implementation over the range of interest for heap sampling.
const randomBitCount = 26
var e float64
for i := 1; i < 1<<randomBitCount; i++ {
l, fl := math.Log2(float64(i)), runtime.Fastlog2(float64(i))
d := l - fl
e += d * d
}
e = math.Sqrt(e)
if e > 1.0 {
t.Fatalf("imprecision on fastlog2 implementation, want <=1.0, got %f", e)
}
}
// AUTO-GENERATED by mkfastlog2table.go
// Run go generate from src/runtime to update.
// See mkfastlog2table.go for comments.
package runtime
const fastlogNumBits = 5
var fastlog2Table = [1<<fastlogNumBits + 1]float64{
0,
0.0443941193584535,
0.08746284125033943,
0.12928301694496647,
0.16992500144231248,
0.2094533656289499,
0.24792751344358555,
0.28540221886224837,
0.3219280948873623,
0.3575520046180837,
0.39231742277876036,
0.4262647547020979,
0.4594316186372973,
0.4918530963296748,
0.5235619560570128,
0.5545888516776374,
0.5849625007211563,
0.6147098441152082,
0.6438561897747247,
0.6724253419714956,
0.7004397181410922,
0.7279204545631992,
0.7548875021634686,
0.7813597135246596,
0.8073549220576042,
0.8328900141647417,
0.8579809951275721,
0.8826430493618412,
0.9068905956085185,
0.9307373375628862,
0.9541963103868752,
0.9772799234999164,
1,
}
......@@ -792,28 +792,45 @@ func rawmem(size uintptr) unsafe.Pointer {
}
func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
c := mp.mcache
rate := MemProfileRate
if size < uintptr(rate) {
// pick next profile time
// If you change this, also change allocmcache.
if rate > 0x3fffffff { // make 2*rate not overflow
rate = 0x3fffffff
}
next := int32(fastrand1()) % (2 * int32(rate))
// Subtract the "remainder" of the current allocation.
// Otherwise objects that are close in size to sampling rate
// will be under-sampled, because we consistently discard this remainder.
next -= (int32(size) - c.next_sample)
if next < 0 {
next = 0
}
c.next_sample = next
}
mp.mcache.next_sample = nextSample()
mProf_Malloc(x, size)
}
// nextSample returns the next sampling point for heap profiling.
// It produces a random variable with a geometric distribution and
// mean MemProfileRate. This is done by generating a uniformly
// distributed random number and applying the cumulative distribution
// function for an exponential.
func nextSample() int32 {
period := MemProfileRate
// make nextSample not overflow. Maximum possible step is
// -ln(1/(1<<kRandomBitCount)) * period, approximately 20 * period.
switch {
case period > 0x7000000:
period = 0x7000000
case period == 0:
return 0
}
// Let m be the sample rate,
// the probability distribution function is m*exp(-mx), so the CDF is
// p = 1 - exp(-mx), so
// q = 1 - p == exp(-mx)
// log_e(q) = -mx
// -log_e(q)/m = x
// x = -log_e(q) * period
// x = log_2(q) * (-log_e(2)) * period ; Using log_2 for efficiency
const randomBitCount = 26
q := uint32(fastrand1())%(1<<randomBitCount) + 1
qlog := fastlog2(float64(q)) - randomBitCount
if qlog > 0 {
qlog = 0
}
const minusLog2 = -0.6931471805599453 // -ln(2)
return int32(qlog*(minusLog2*float64(period))) + 1
}
type persistentAlloc struct {
base unsafe.Pointer
off uintptr
......
......@@ -69,16 +69,7 @@ func allocmcache() *mcache {
for i := 0; i < _NumSizeClasses; i++ {
c.alloc[i] = &emptymspan
}
// Set first allocation sample size.
rate := MemProfileRate
if rate > 0x3fffffff { // make 2*rate not overflow
rate = 0x3fffffff
}
if rate != 0 {
c.next_sample = int32(int(fastrand1()) % (2 * rate))
}
c.next_sample = nextSample()
return c
}
......
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// fastlog2Table contains log2 approximations for 5 binary digits.
// This is used to implement fastlog2, which is used for heap sampling.
package main
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"math"
)
func main() {
var buf bytes.Buffer
fmt.Fprintln(&buf, "// AUTO-GENERATED by mkfastlog2table.go")
fmt.Fprintln(&buf, "// Run go generate from src/runtime to update.")
fmt.Fprintln(&buf, "// See mkfastlog2table.go for comments.")
fmt.Fprintln(&buf)
fmt.Fprintln(&buf, "package runtime")
fmt.Fprintln(&buf)
fmt.Fprintln(&buf, "const fastlogNumBits =", fastlogNumBits)
fmt.Fprintln(&buf)
fmt.Fprintln(&buf, "var fastlog2Table = [1<<fastlogNumBits + 1]float64{")
table := computeTable()
for _, t := range table {
fmt.Fprintf(&buf, "\t%v,\n", t)
}
fmt.Fprintln(&buf, "}")
if err := ioutil.WriteFile("fastlog2table.go", buf.Bytes(), 0644); err != nil {
log.Fatalln(err)
}
}
const fastlogNumBits = 5
func computeTable() []float64 {
fastlog2Table := make([]float64, 1<<fastlogNumBits+1)
for i := 0; i <= (1 << fastlogNumBits); i++ {
fastlog2Table[i] = math.Log2(1.0 + float64(i)/(1<<fastlogNumBits))
}
return fastlog2Table
}
......@@ -8,6 +8,7 @@ import _ "unsafe" // for go:linkname
//go:generate go run wincallback.go
//go:generate go run mkduff.go
//go:generate go run mkfastlog2table.go
var ticks struct {
lock mutex
......
// run
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Test heap sampling logic.
package main
import (
"fmt"
"math"
"runtime"
)
var a16 *[16]byte
var a512 *[512]byte
var a256 *[256]byte
var a1k *[1024]byte
var a64k *[64 * 1024]byte
// This test checks that heap sampling produces reasonable
// results. Note that heap sampling uses randomization, so the results
// vary for run to run. This test only checks that the resulting
// values appear reasonable.
func main() {
const countInterleaved = 10000
allocInterleaved(countInterleaved)
checkAllocations(getMemProfileRecords(), "main.allocInterleaved", countInterleaved, []int64{256 * 1024, 1024, 256 * 1024, 512, 256 * 1024, 256})
const count = 100000
alloc(count)
checkAllocations(getMemProfileRecords(), "main.alloc", count, []int64{1024, 512, 256})
}
// allocInterleaved stress-tests the heap sampling logic by
// interleaving large and small allocations.
func allocInterleaved(n int) {
for i := 0; i < n; i++ {
// Test verification depends on these lines being contiguous.
a64k = new([64 * 1024]byte)
a1k = new([1024]byte)
a64k = new([64 * 1024]byte)
a512 = new([512]byte)
a64k = new([64 * 1024]byte)
a256 = new([256]byte)
}
}
// alloc performs only small allocations for sanity testing.
func alloc(n int) {
for i := 0; i < n; i++ {
// Test verification depends on these lines being contiguous.
a1k = new([1024]byte)
a512 = new([512]byte)
a256 = new([256]byte)
}
}
// checkAllocations validates that the profile records collected for
// the named function are consistent with count contiguous allocations
// of the specified sizes.
func checkAllocations(records []runtime.MemProfileRecord, fname string, count int64, size []int64) {
a := allocObjects(records, fname)
firstLine := 0
for ln := range a {
if firstLine == 0 || firstLine > ln {
firstLine = ln
}
}
var totalcount int64
for i, w := range size {
ln := firstLine + i
s := a[ln]
checkValue(fname, ln, "objects", count, s.objects)
checkValue(fname, ln, "bytes", count*w, s.bytes)
totalcount += s.objects
}
// Check the total number of allocations, to ensure some sampling occurred.
if totalwant := count * int64(len(size)); totalcount <= 0 || totalcount > totalwant*1024 {
panic(fmt.Sprintf("%s want total count > 0 && <= %d, got %d", fname, totalwant*1024, totalcount))
}
}
// checkValue checks an unsampled value against a range.
func checkValue(fname string, ln int, name string, want, got int64) {
if got < 0 || got > 1024*want {
panic(fmt.Sprintf("%s:%d want %s >= 0 && <= %d, got %d", fname, ln, name, 1024*want, got))
}
}
func getMemProfileRecords() []runtime.MemProfileRecord {
// Find out how many records there are (MemProfile(nil, true)),
// allocate that many records, and get the data.
// There's a race—more records might be added between
// the two calls—so allocate a few extra records for safety
// and also try again if we're very unlucky.
// The loop should only execute one iteration in the common case.
var p []runtime.MemProfileRecord
n, ok := runtime.MemProfile(nil, true)
for {
// Allocate room for a slightly bigger profile,
// in case a few more entries have been added
// since the call to MemProfile.
p = make([]runtime.MemProfileRecord, n+50)
n, ok = runtime.MemProfile(p, true)
if ok {
p = p[0:n]
break
}
// Profile grew; try again.
}
return p
}
type allocStat struct {
bytes, objects int64
}
// allocObjects examines the profile records for the named function
// and returns the allocation stats aggregated by source line number.
func allocObjects(records []runtime.MemProfileRecord, function string) map[int]allocStat {
a := make(map[int]allocStat)
for _, r := range records {
for _, s := range r.Stack0 {
if s == 0 {
break
}
if f := runtime.FuncForPC(s); f != nil {
name := f.Name()
_, line := f.FileLine(s)
if name == function {
allocStat := a[line]
allocStat.bytes += r.AllocBytes
allocStat.objects += r.AllocObjects
a[line] = allocStat
}
}
}
}
for line, stats := range a {
objects, bytes := scaleHeapSample(stats.objects, stats.bytes, int64(runtime.MemProfileRate))
a[line] = allocStat{bytes, objects}
}
return a
}
// scaleHeapSample unsamples heap allocations.
// Taken from src/cmd/pprof/internal/profile/legacy_profile.go
func scaleHeapSample(count, size, rate int64) (int64, int64) {
if count == 0 || size == 0 {
return 0, 0
}
if rate <= 1 {
// if rate==1 all samples were collected so no adjustment is needed.
// if rate<1 treat as unknown and skip scaling.
return count, size
}
avgSize := float64(size) / float64(count)
scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
return int64(float64(count) * scale), int64(float64(size) * scale)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment