Commit 1a680a90 authored by Russ Cox's avatar Russ Cox

runtime/pprof: use new profile buffers for CPU profiling

This doesn't change the functionality of the current code,
but it sets us up for exporting the profiling labels into the profile.

The old code had a hash table of profile samples maintained
during the signal handler, with evictions going into a log.
The new code just logs every sample directly, leaving the
hash-based deduplication to an ordinary goroutine.

The new code also avoids storing the entire profile in two
forms in memory, an unfortunate regression introduced
when binary profile support was added. After this CL the
entire profile is only stored once in memory. We'd still like
to get back down to storing it zero times (streaming it to
the underlying io.Writer).

Change-Id: I0893a1788267c564aa1af17970d47377b2a43457
Reviewed-on: https://go-review.googlesource.com/36712
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: 's avatarMichael Matloob <matloob@golang.org>
parent a1261b8b
This diff is collapsed.
...@@ -83,6 +83,7 @@ import ( ...@@ -83,6 +83,7 @@ import (
"sync" "sync"
"text/tabwriter" "text/tabwriter"
"time" "time"
"unsafe"
) )
// BUG(rsc): Profiles are only as good as the kernel support used to generate them. // BUG(rsc): Profiles are only as good as the kernel support used to generate them.
...@@ -696,30 +697,33 @@ func StartCPUProfile(w io.Writer) error { ...@@ -696,30 +697,33 @@ func StartCPUProfile(w io.Writer) error {
return nil return nil
} }
// readProfile, provided by the runtime, returns the next chunk of
// binary CPU profiling stack trace data, blocking until data is available.
// If profiling is turned off and all the profile data accumulated while it was
// on has been returned, readProfile returns eof=true.
// The caller must save the returned data and tags before calling readProfile again.
func readProfile() (data []uint64, tags []unsafe.Pointer, eof bool)
func profileWriter(w io.Writer) { func profileWriter(w io.Writer) {
startTime := time.Now() b := newProfileBuilder()
// This will buffer the entire profile into buf and then var err error
// translate it into a profile.Profile structure. This will
// create two copies of all the data in the profile in memory.
// TODO(matloob): Convert each chunk of the proto output and
// stream it out instead of converting the entire profile.
var buf bytes.Buffer
for { for {
data := runtime.CPUProfile() time.Sleep(100 * time.Millisecond)
if data == nil { data, _, eof := readProfile()
if e := b.addCPUData(data); e != nil && err == nil {
err = e
}
if eof {
break break
} }
buf.Write(data)
} }
p := b.build()
profile, err := translateCPUProfile(buf.Bytes(), startTime)
if err != nil { if err != nil {
// The runtime should never produce an invalid or truncated profile. // The runtime should never produce an invalid or truncated profile.
// It drops records that can't fit into its log buffers. // It drops records that can't fit into its log buffers.
panic(fmt.Errorf("could not translate binary profile to proto format: %v", err)) panic("runtime/pprof: converting profile: " + err.Error())
} }
p.Write(w)
profile.Write(w)
cpu.done <- true cpu.done <- true
} }
......
...@@ -15,50 +15,125 @@ import ( ...@@ -15,50 +15,125 @@ import (
"internal/pprof/profile" "internal/pprof/profile"
) )
// translateCPUProfile parses binary CPU profiling stack trace data // lostProfileEvent is the function to which lost profiling
// generated by runtime.CPUProfile() into a profile struct. // events are attributed.
func translateCPUProfile(b []byte, startTime time.Time) (*profile.Profile, error) { // (The name shows up in the pprof graphs.)
const wordSize = unsafe.Sizeof(uintptr(0)) func lostProfileEvent() { lostProfileEvent() }
const minRawProfile = 5 * wordSize // Need a minimum of 5 words.
if uintptr(len(b)) < minRawProfile { // funcPC returns the PC for the func value f.
return nil, fmt.Errorf("truncated profile") func funcPC(f interface{}) uintptr {
} return *(*[2]*uintptr)(unsafe.Pointer(&f))[1]
n := int(uintptr(len(b)) / wordSize) }
data := ((*[1 << 28]uintptr)(unsafe.Pointer(&b[0])))[:n:n]
period := data[3]
data = data[5:] // skip header
// profile initialization taken from pprof tool // A profileBuilder builds a profile.Profile incrementally from a
// stream of profile samples delivered by the runtime.
// TODO(rsc,matloob): In the long term, we'd like to avoid
// storing the entire profile.Profile in memory, instead streaming
// the encoded form out to an underlying writer.
// Even so, this one copy is a step forward from Go 1.8,
// which had two full copies of the data in memory.
type profileBuilder struct {
p *profile.Profile
start time.Time
havePeriod bool
locs map[uint64]*profile.Location
samples map[sampleKey]*profile.Sample
}
// A sampleKey is the key for the map from stack to profile.Sample.
// It is an unbounded array of profile.Location, broken into
// fixed-size chunks. The chunks are chained by the next field,
// which is an interface{} holding a sampleKey so that the default
// Go equality will consider the whole array contents.
// (In contrast, if next were *sampleKey or the interface{} held a
// *sampleKey, equality would only look at the pointer, not the values
// in the next sampleKey in the chain.)
// This is a bit of a hack, but it has the right effect and is expedient.
// At some point we will want to do a better job, so that lookups
// of large stacks need not allocate just to build a key.
type sampleKey struct {
loc [8]*profile.Location
i int
next interface{}
}
// newProfileBuilder returns a new profileBuilder.
// CPU profiling data obtained from the runtime can be added
// by calling b.addCPUData, and then the eventual profile
// can be obtained by calling b.finish.
func newProfileBuilder() *profileBuilder {
start := time.Now()
p := &profile.Profile{ p := &profile.Profile{
Period: int64(period) * 1000,
PeriodType: &profile.ValueType{Type: "cpu", Unit: "nanoseconds"}, PeriodType: &profile.ValueType{Type: "cpu", Unit: "nanoseconds"},
SampleType: []*profile.ValueType{ SampleType: []*profile.ValueType{
{Type: "samples", Unit: "count"}, {Type: "samples", Unit: "count"},
{Type: "cpu", Unit: "nanoseconds"}, {Type: "cpu", Unit: "nanoseconds"},
}, },
TimeNanos: int64(startTime.UnixNano()), TimeNanos: int64(start.UnixNano()),
DurationNanos: time.Since(startTime).Nanoseconds(), }
return &profileBuilder{
p: p,
start: start,
locs: make(map[uint64]*profile.Location),
samples: make(map[sampleKey]*profile.Sample),
} }
}
// addCPUData adds the CPU profiling data to the profile.
// The data must be a whole number of records,
// as delivered by the runtime.
func (b *profileBuilder) addCPUData(data []uint64) error {
p := b.p
if !b.havePeriod {
// first record is period
if len(data) < 3 {
return fmt.Errorf("truncated profile")
}
if data[0] != 3 || data[2] == 0 {
return fmt.Errorf("malformed profile")
}
period := int64(data[2])
p.Period = period * 1000
data = data[3:]
b.havePeriod = true
}
// Parse CPU samples from the profile. // Parse CPU samples from the profile.
locs := make(map[uint64]*profile.Location) // Each sample is 3+n uint64s:
for len(b) > 0 { // data[0] = 3+n
if len(data) < 2 || uintptr(len(data)) < 2+data[1] { // data[1] = time stamp (ignored)
return nil, fmt.Errorf("truncated profile") // data[2] = count
// data[3:3+n] = stack
// If the count is 0 and the stack has length 1,
// that's an overflow record inserted by the runtime
// to indicate that stack[0] samples were lost.
// Otherwise the count is usually 1,
// but in a few special cases like lost non-Go samples
// there can be larger counts.
// Because many samples with the same stack arrive,
// we want to deduplicate immediately, which we do
// using the b.samples map.
for len(data) > 0 {
if len(data) < 3 || data[0] > uint64(len(data)) {
return fmt.Errorf("truncated profile")
} }
count := data[0] if data[0] < 3 {
nstk := data[1] return fmt.Errorf("malformed profile")
if uintptr(len(data)) < 2+nstk {
return nil, fmt.Errorf("truncated profile")
} }
stk := data[2 : 2+nstk] count := data[2]
data = data[2+nstk:] stk := data[3:data[0]]
data = data[data[0]:]
if count == 0 && nstk == 1 && stk[0] == 0 { if count == 0 && len(stk) == 1 {
// end of data marker // overflow record
break count = uint64(stk[0])
stk = []uint64{
uint64(funcPC(lostProfileEvent)),
}
} }
sloc := make([]*profile.Location, len(stk)) sloc := make([]*profile.Location, len(stk))
skey := sampleKey{}
for i, addr := range stk { for i, addr := range stk {
addr := uint64(addr) addr := uint64(addr)
// Addresses from stack traces point to the next instruction after // Addresses from stack traces point to the next instruction after
...@@ -67,40 +142,57 @@ func translateCPUProfile(b []byte, startTime time.Time) (*profile.Profile, error ...@@ -67,40 +142,57 @@ func translateCPUProfile(b []byte, startTime time.Time) (*profile.Profile, error
if i > 0 { if i > 0 {
addr-- addr--
} }
loc := locs[addr] loc := b.locs[addr]
if loc == nil { if loc == nil {
loc = &profile.Location{ loc = &profile.Location{
ID: uint64(len(p.Location) + 1), ID: uint64(len(p.Location) + 1),
Address: addr, Address: addr,
} }
locs[addr] = loc b.locs[addr] = loc
p.Location = append(p.Location, loc) p.Location = append(p.Location, loc)
} }
sloc[i] = loc sloc[i] = loc
if skey.i == len(skey.loc) {
skey = sampleKey{next: skey}
}
skey.loc[skey.i] = loc
skey.i++
} }
p.Sample = append(p.Sample, &profile.Sample{ s := b.samples[skey]
Value: []int64{int64(count), int64(count) * int64(p.Period)}, if s == nil {
Location: sloc, s = &profile.Sample{
}) Value: []int64{0, 0},
Location: sloc,
}
b.samples[skey] = s
p.Sample = append(p.Sample, s)
}
s.Value[0] += int64(count)
s.Value[1] += int64(count) * int64(p.Period)
} }
return nil
}
// build completes and returns the constructed profile.
func (b *profileBuilder) build() *profile.Profile {
b.p.DurationNanos = time.Since(b.start).Nanoseconds()
if runtime.GOOS == "linux" { if runtime.GOOS == "linux" {
if err := addMappings(p); err != nil { addMappings(b.p)
return nil, err
}
} }
symbolize(p) symbolize(b.p)
return p, nil return b.p
} }
func addMappings(p *profile.Profile) error { // addMappings adds information from /proc/self/maps
// to the profile if possible.
func addMappings(p *profile.Profile) {
// Parse memory map from /proc/self/maps // Parse memory map from /proc/self/maps
f, err := os.Open("/proc/self/maps") f, err := os.Open("/proc/self/maps")
if err != nil { if err != nil {
return err return
} }
defer f.Close() p.ParseMemoryMap(f)
return p.ParseMemoryMap(f) f.Close()
} }
type function interface { type function interface {
......
...@@ -6,80 +6,50 @@ package pprof ...@@ -6,80 +6,50 @@ package pprof
import ( import (
"bytes" "bytes"
"fmt" "encoding/json"
"internal/pprof/profile" "internal/pprof/profile"
"io/ioutil" "io/ioutil"
"reflect" "reflect"
"runtime" "runtime"
"testing" "testing"
"time"
"unsafe"
) )
// Helper function to initialize empty cpu profile with sampling period provided. // translateCPUProfile parses binary CPU profiling stack trace data
func createEmptyProfileWithPeriod(t *testing.T, periodMs uint64) bytes.Buffer { // generated by runtime.CPUProfile() into a profile struct.
// Mock the sample header produced by cpu profiler. Write a sample // This is only used for testing. Real conversions stream the
// period of 2000 microseconds, followed by no samples. // data into the profileBuilder as it becomes available.
buf := new(bytes.Buffer) func translateCPUProfile(data []uint64) (*profile.Profile, error) {
// Profile header is as follows: b := newProfileBuilder()
// The first, third and fifth words are 0. The second word is 3. if err := b.addCPUData(data); err != nil {
// The fourth word is the period. return nil, err
// EOD marker: }
// The sixth word -- count is initialized to 0 above. return b.build(), nil
// The code below sets the seventh word -- nstk to 1
// The eighth word -- addr is initialized to 0 above.
words := []int{0, 3, 0, int(periodMs), 0, 0, 1, 0}
n := int(unsafe.Sizeof(0)) * len(words)
data := ((*[1 << 29]byte)(unsafe.Pointer(&words[0])))[:n:n]
if _, err := buf.Write(data); err != nil {
t.Fatalf("createEmptyProfileWithPeriod failed: %v", err)
}
return *buf
} }
// Helper function to initialize cpu profile with two sample values. // fmtJSON returns a pretty-printed JSON form for x.
func createProfileWithTwoSamples(t *testing.T, periodMs uintptr, count1 uintptr, count2 uintptr, // It works reasonbly well for printing protocol-buffer
address1 uintptr, address2 uintptr) bytes.Buffer { // data structures like profile.Profile.
// Mock the sample header produced by cpu profiler. Write a sample func fmtJSON(x interface{}) string {
// period of 2000 microseconds, followed by no samples. js, _ := json.MarshalIndent(x, "", "\t")
buf := new(bytes.Buffer) return string(js)
words := []uintptr{0, 3, 0, uintptr(periodMs), 0, uintptr(count1), 2,
uintptr(address1), uintptr(address1 + 2),
uintptr(count2), 2, uintptr(address2), uintptr(address2 + 2),
0, 1, 0}
for _, n := range words {
var err error
switch unsafe.Sizeof(int(0)) {
case 8:
_, err = buf.Write((*[8]byte)(unsafe.Pointer(&n))[:8:8])
case 4:
_, err = buf.Write((*[4]byte)(unsafe.Pointer(&n))[:4:4])
}
if err != nil {
t.Fatalf("createProfileWithTwoSamples failed: %v", err)
}
}
return *buf
} }
// Tests translateCPUProfile parses correct sampling period in an otherwise empty cpu profile. func TestConvertCPUProfileEmpty(t *testing.T) {
func TestTranlateCPUProfileSamplingPeriod(t *testing.T) {
// A test server with mock cpu profile data. // A test server with mock cpu profile data.
var buf bytes.Buffer var buf bytes.Buffer
startTime := time.Now() b := []uint64{3, 0, 2000} // empty profile with 2000ms sample period
b := createEmptyProfileWithPeriod(t, 2000) p, err := translateCPUProfile(b)
p, err := translateCPUProfile(b.Bytes(), startTime)
if err != nil { if err != nil {
t.Fatalf("translate failed: %v", err) t.Fatalf("translateCPUProfile: %v", err)
} }
if err := p.Write(&buf); err != nil { if err := p.Write(&buf); err != nil {
t.Fatalf("write failed: %v", err) t.Fatalf("writing profile: %v", err)
} }
p, err = profile.Parse(&buf) p, err = profile.Parse(&buf)
if err != nil { if err != nil {
t.Fatalf("Could not parse Profile profile: %v", err) t.Fatalf("profile.Parse: %v", err)
} }
// Expected PeriodType and SampleType. // Expected PeriodType and SampleType.
...@@ -94,79 +64,89 @@ func TestTranlateCPUProfileSamplingPeriod(t *testing.T) { ...@@ -94,79 +64,89 @@ func TestTranlateCPUProfileSamplingPeriod(t *testing.T) {
} }
} }
func getSampleAsString(sample []*profile.Sample) string { func f1() { f1() }
var str string func f2() { f2() }
for _, x := range sample {
for _, y := range x.Location { // testPCs returns two PCs and two corresponding memory mappings
if y.Mapping != nil { // to use in test profiles.
str += fmt.Sprintf("Mapping:%v\n", *y.Mapping) func testPCs(t *testing.T) (addr1, addr2 uint64, map1, map2 *profile.Mapping) {
} if runtime.GOOS == "linux" {
str += fmt.Sprintf("Location:%v\n", y) // Figure out two addresses from /proc/self/maps.
mmap, err := ioutil.ReadFile("/proc/self/maps")
if err != nil {
t.Fatal(err)
} }
str += fmt.Sprintf("Sample:%v\n", *x) mprof := &profile.Profile{}
} if err = mprof.ParseMemoryMap(bytes.NewReader(mmap)); err != nil {
return str t.Fatalf("parsing /proc/self/maps: %v", err)
}
if len(mprof.Mapping) < 2 {
// It is possible for a binary to only have 1 executable
// region of memory.
t.Skipf("need 2 or more mappings, got %v", len(mprof.Mapping))
}
addr1 = mprof.Mapping[0].Start
map1 = mprof.Mapping[0]
addr2 = mprof.Mapping[1].Start
map2 = mprof.Mapping[1]
} else {
addr1 = uint64(funcPC(f1))
addr2 = uint64(funcPC(f2))
}
return
} }
// Tests translateCPUProfile parses a cpu profile with sample values present. func TestConvertCPUProfile(t *testing.T) {
func TestTranslateCPUProfileWithSamples(t *testing.T) { addr1, addr2, map1, map2 := testPCs(t)
if runtime.GOOS != "linux" {
t.Skip("test requires a system with /proc/self/maps")
}
// Figure out two addresses from /proc/self/maps.
mmap, err := ioutil.ReadFile("/proc/self/maps")
if err != nil {
t.Fatal("Cannot read /proc/self/maps")
}
rd := bytes.NewReader(mmap)
mprof := &profile.Profile{}
if err = mprof.ParseMemoryMap(rd); err != nil {
t.Fatalf("Cannot parse /proc/self/maps")
}
if len(mprof.Mapping) < 2 {
// It is possible for a binary to only have 1 executable
// region of memory.
t.Skipf("need 2 or more mappings, got %v", len(mprof.Mapping))
}
address1 := mprof.Mapping[0].Start
address2 := mprof.Mapping[1].Start
// A test server with mock cpu profile data.
startTime := time.Now()
b := createProfileWithTwoSamples(t, 2000, 20, 40, uintptr(address1), uintptr(address2))
p, err := translateCPUProfile(b.Bytes(), startTime)
b := []uint64{
3, 0, 2000, // periodMs = 2000
5, 0, 10, uint64(addr1), uint64(addr1 + 2), // 10 samples in addr1
5, 0, 40, uint64(addr2), uint64(addr2 + 2), // 40 samples in addr2
5, 0, 10, uint64(addr1), uint64(addr1 + 2), // 10 samples in addr1
}
p, err := translateCPUProfile(b)
if err != nil { if err != nil {
t.Fatalf("Could not parse Profile profile: %v", err) t.Fatalf("translating profile: %v", err)
} }
// Expected PeriodType, SampleType and Sample. period := int64(2000 * 1000)
expectedPeriodType := &profile.ValueType{Type: "cpu", Unit: "nanoseconds"} periodType := &profile.ValueType{Type: "cpu", Unit: "nanoseconds"}
expectedSampleType := []*profile.ValueType{ sampleType := []*profile.ValueType{
{Type: "samples", Unit: "count"}, {Type: "samples", Unit: "count"},
{Type: "cpu", Unit: "nanoseconds"}, {Type: "cpu", Unit: "nanoseconds"},
} }
expectedSample := []*profile.Sample{ samples := []*profile.Sample{
{Value: []int64{20, 20 * 2000 * 1000}, Location: []*profile.Location{ {Value: []int64{20, 20 * 2000 * 1000}, Location: []*profile.Location{
{ID: 1, Mapping: mprof.Mapping[0], Address: address1}, {ID: 1, Mapping: map1, Address: addr1},
{ID: 2, Mapping: mprof.Mapping[0], Address: address1 + 1}, {ID: 2, Mapping: map1, Address: addr1 + 1},
}}, }},
{Value: []int64{40, 40 * 2000 * 1000}, Location: []*profile.Location{ {Value: []int64{40, 40 * 2000 * 1000}, Location: []*profile.Location{
{ID: 3, Mapping: mprof.Mapping[1], Address: address2}, {ID: 3, Mapping: map2, Address: addr2},
{ID: 4, Mapping: mprof.Mapping[1], Address: address2 + 1}, {ID: 4, Mapping: map2, Address: addr2 + 1},
}}, }},
} }
if p.Period != 2000*1000 { checkProfile(t, p, period, periodType, sampleType, samples)
t.Fatalf("Sampling periods do not match") }
func checkProfile(t *testing.T, p *profile.Profile, period int64, periodType *profile.ValueType, sampleType []*profile.ValueType, samples []*profile.Sample) {
if p.Period != period {
t.Fatalf("p.Period = %d, want %d", p.Period, period)
}
if !reflect.DeepEqual(p.PeriodType, periodType) {
t.Fatalf("p.PeriodType = %v\nwant = %v", fmtJSON(p.PeriodType), fmtJSON(periodType))
} }
if !reflect.DeepEqual(p.PeriodType, expectedPeriodType) { if !reflect.DeepEqual(p.SampleType, sampleType) {
t.Fatalf("Period types do not match") t.Fatalf("p.SampleType = %v\nwant = %v", fmtJSON(p.SampleType), fmtJSON(sampleType))
} }
if !reflect.DeepEqual(p.SampleType, expectedSampleType) { // Clear line info since it is not in the expected samples.
t.Fatalf("Sample types do not match") // If we used f1 and f2 above, then the samples will have line info.
for _, s := range p.Sample {
for _, l := range s.Location {
l.Line = nil
}
} }
if !reflect.DeepEqual(p.Sample, expectedSample) { if !reflect.DeepEqual(p.Sample, samples) {
t.Fatalf("Samples do not match: Expected: %v, Got:%v", getSampleAsString(expectedSample), t.Fatalf("p.Sample = %v\nwant = %v", fmtJSON(p.Sample), fmtJSON(samples))
getSampleAsString(p.Sample))
} }
} }
...@@ -179,7 +159,7 @@ type fakeFunc struct { ...@@ -179,7 +159,7 @@ type fakeFunc struct {
func (f *fakeFunc) Name() string { func (f *fakeFunc) Name() string {
return f.name return f.name
} }
func (f *fakeFunc) FileLine(_ uintptr) (string, int) { func (f *fakeFunc) FileLine(uintptr) (string, int) {
return f.file, f.lineno return f.file, f.lineno
} }
......
...@@ -7,98 +7,54 @@ package pprof ...@@ -7,98 +7,54 @@ package pprof
import ( import (
"bytes" "bytes"
"internal/pprof/profile" "internal/pprof/profile"
"io/ioutil"
"reflect"
"runtime" "runtime"
"testing" "testing"
"time" "time"
) )
// TestSampledHeapAllocProfile tests encoding of a memory profile from func TestConvertMemProfile(t *testing.T) {
// runtime.MemProfileRecord data. addr1, addr2, map1, map2 := testPCs(t)
func TestSampledHeapAllocProfile(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("Test requires a system with /proc/self/maps")
}
// Figure out two addresses from /proc/self/maps.
mmap, err := ioutil.ReadFile("/proc/self/maps")
if err != nil {
t.Fatal("Cannot read /proc/self/maps")
}
rd := bytes.NewReader(mmap)
mprof := &profile.Profile{}
if err = mprof.ParseMemoryMap(rd); err != nil {
t.Fatalf("Cannot parse /proc/self/maps")
}
if len(mprof.Mapping) < 2 {
// It is possible for a binary to only have 1 executable
// region of memory.
t.Skipf("need 2 or more mappings, got %v", len(mprof.Mapping))
}
address1 := mprof.Mapping[0].Start
address2 := mprof.Mapping[1].Start
var buf bytes.Buffer var buf bytes.Buffer
a1, a2 := uintptr(addr1), uintptr(addr2)
rate := int64(512 * 1024)
rec := []runtime.MemProfileRecord{
{AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack0: [32]uintptr{a1, a2}},
{AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack0: [32]uintptr{a2 + 1, a2 + 2}},
{AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{a1 + 1, a1 + 2, a2 + 3}},
}
rec, rate := testMemRecords(address1, address2)
p := encodeMemProfile(rec, rate, time.Now()) p := encodeMemProfile(rec, rate, time.Now())
if err := p.Write(&buf); err != nil { if err := p.Write(&buf); err != nil {
t.Fatalf("Failed to write profile: %v", err) t.Fatalf("writing profile: %v", err)
} }
p, err = profile.Parse(&buf) p, err := profile.Parse(&buf)
if err != nil { if err != nil {
t.Fatalf("Could not parse Profile profile: %v", err) t.Fatalf("profile.Parse: %v", err)
} }
// Expected PeriodType, SampleType and Sample. periodType := &profile.ValueType{Type: "space", Unit: "bytes"}
expectedPeriodType := &profile.ValueType{Type: "space", Unit: "bytes"} sampleType := []*profile.ValueType{
expectedSampleType := []*profile.ValueType{
{Type: "alloc_objects", Unit: "count"}, {Type: "alloc_objects", Unit: "count"},
{Type: "alloc_space", Unit: "bytes"}, {Type: "alloc_space", Unit: "bytes"},
{Type: "inuse_objects", Unit: "count"}, {Type: "inuse_objects", Unit: "count"},
{Type: "inuse_space", Unit: "bytes"}, {Type: "inuse_space", Unit: "bytes"},
} }
// Expected samples, with values unsampled according to the profiling rate. samples := []*profile.Sample{
expectedSample := []*profile.Sample{
{Value: []int64{2050, 2099200, 1537, 1574400}, Location: []*profile.Location{ {Value: []int64{2050, 2099200, 1537, 1574400}, Location: []*profile.Location{
{ID: 1, Mapping: mprof.Mapping[0], Address: address1}, {ID: 1, Mapping: map1, Address: addr1},
{ID: 2, Mapping: mprof.Mapping[1], Address: address2}, {ID: 2, Mapping: map2, Address: addr2},
}}, }},
{Value: []int64{1, 829411, 1, 829411}, Location: []*profile.Location{ {Value: []int64{1, 829411, 1, 829411}, Location: []*profile.Location{
{ID: 3, Mapping: mprof.Mapping[1], Address: address2 + 1}, {ID: 3, Mapping: map2, Address: addr2 + 1},
{ID: 4, Mapping: mprof.Mapping[1], Address: address2 + 2}, {ID: 4, Mapping: map2, Address: addr2 + 2},
}}, }},
{Value: []int64{1, 829411, 0, 0}, Location: []*profile.Location{ {Value: []int64{1, 829411, 0, 0}, Location: []*profile.Location{
{ID: 5, Mapping: mprof.Mapping[0], Address: address1 + 1}, {ID: 5, Mapping: map1, Address: addr1 + 1},
{ID: 6, Mapping: mprof.Mapping[0], Address: address1 + 2}, {ID: 6, Mapping: map1, Address: addr1 + 2},
{ID: 7, Mapping: mprof.Mapping[1], Address: address2 + 3}, {ID: 7, Mapping: map2, Address: addr2 + 3},
}}, }},
} }
checkProfile(t, p, rate, periodType, sampleType, samples)
if p.Period != 512*1024 {
t.Fatalf("Sampling periods do not match")
}
if !reflect.DeepEqual(p.PeriodType, expectedPeriodType) {
t.Fatalf("Period types do not match")
}
if !reflect.DeepEqual(p.SampleType, expectedSampleType) {
t.Fatalf("Sample types do not match")
}
if !reflect.DeepEqual(p.Sample, expectedSample) {
t.Fatalf("Samples do not match: Expected: %v, Got:%v", getSampleAsString(expectedSample),
getSampleAsString(p.Sample))
}
}
func testMemRecords(a1, a2 uint64) ([]runtime.MemProfileRecord, int64) {
addr1, addr2 := uintptr(a1), uintptr(a2)
rate := int64(512 * 1024)
rec := []runtime.MemProfileRecord{
{AllocBytes: 4096, FreeBytes: 1024, AllocObjects: 4, FreeObjects: 1, Stack0: [32]uintptr{addr1, addr2}},
{AllocBytes: 512 * 1024, FreeBytes: 0, AllocObjects: 1, FreeObjects: 0, Stack0: [32]uintptr{addr2 + 1, addr2 + 2}},
{AllocBytes: 512 * 1024, FreeBytes: 512 * 1024, AllocObjects: 1, FreeObjects: 1, Stack0: [32]uintptr{addr1 + 1, addr1 + 2, addr2 + 3}},
}
return rec, rate
} }
...@@ -3137,13 +3137,14 @@ func mcount() int32 { ...@@ -3137,13 +3137,14 @@ func mcount() int32 {
} }
var prof struct { var prof struct {
lock uint32 signalLock uint32
hz int32 hz int32
} }
func _System() { _System() } func _System() { _System() }
func _ExternalCode() { _ExternalCode() } func _ExternalCode() { _ExternalCode() }
func _GC() { _GC() } func _LostExternalCode() { _LostExternalCode() }
func _GC() { _GC() }
// Called if we receive a SIGPROF signal. // Called if we receive a SIGPROF signal.
// Called by the signal handler, may run during STW. // Called by the signal handler, may run during STW.
...@@ -3279,14 +3280,7 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { ...@@ -3279,14 +3280,7 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
} }
if prof.hz != 0 { if prof.hz != 0 {
// Simple cas-lock to coordinate with setcpuprofilerate. cpuprof.add(gp, stk[:n])
for !atomic.Cas(&prof.lock, 0, 1) {
osyield()
}
if prof.hz != 0 {
cpuprof.add(stk[:n])
}
atomic.Store(&prof.lock, 0)
} }
getg().m.mallocing-- getg().m.mallocing--
} }
...@@ -3309,15 +3303,7 @@ func sigprofNonGo() { ...@@ -3309,15 +3303,7 @@ func sigprofNonGo() {
for n < len(sigprofCallers) && sigprofCallers[n] != 0 { for n < len(sigprofCallers) && sigprofCallers[n] != 0 {
n++ n++
} }
cpuprof.addNonGo(sigprofCallers[:n])
// Simple cas-lock to coordinate with setcpuprofilerate.
for !atomic.Cas(&prof.lock, 0, 1) {
osyield()
}
if prof.hz != 0 {
cpuprof.addNonGo(sigprofCallers[:n])
}
atomic.Store(&prof.lock, 0)
} }
atomic.Store(&sigprofCallersUse, 0) atomic.Store(&sigprofCallersUse, 0)
...@@ -3330,19 +3316,11 @@ func sigprofNonGo() { ...@@ -3330,19 +3316,11 @@ func sigprofNonGo() {
//go:nowritebarrierrec //go:nowritebarrierrec
func sigprofNonGoPC(pc uintptr) { func sigprofNonGoPC(pc uintptr) {
if prof.hz != 0 { if prof.hz != 0 {
pc := []uintptr{ stk := []uintptr{
pc, pc,
funcPC(_ExternalCode) + sys.PCQuantum, funcPC(_ExternalCode) + sys.PCQuantum,
} }
cpuprof.addNonGo(stk)
// Simple cas-lock to coordinate with setcpuprofilerate.
for !atomic.Cas(&prof.lock, 0, 1) {
osyield()
}
if prof.hz != 0 {
cpuprof.addNonGo(pc)
}
atomic.Store(&prof.lock, 0)
} }
} }
...@@ -3370,8 +3348,9 @@ func setsSP(pc uintptr) bool { ...@@ -3370,8 +3348,9 @@ func setsSP(pc uintptr) bool {
return false return false
} }
// Arrange to call fn with a traceback hz times a second. // setcpuprofilerate sets the CPU profiling rate to hz times per second.
func setcpuprofilerate_m(hz int32) { // If hz <= 0, setcpuprofilerate turns off CPU profiling.
func setcpuprofilerate(hz int32) {
// Force sane arguments. // Force sane arguments.
if hz < 0 { if hz < 0 {
hz = 0 hz = 0
...@@ -3387,14 +3366,14 @@ func setcpuprofilerate_m(hz int32) { ...@@ -3387,14 +3366,14 @@ func setcpuprofilerate_m(hz int32) {
// it would deadlock. // it would deadlock.
setThreadCPUProfiler(0) setThreadCPUProfiler(0)
for !atomic.Cas(&prof.lock, 0, 1) { for !atomic.Cas(&prof.signalLock, 0, 1) {
osyield() osyield()
} }
if prof.hz != hz { if prof.hz != hz {
setProcessCPUProfiler(hz) setProcessCPUProfiler(hz)
prof.hz = hz prof.hz = hz
} }
atomic.Store(&prof.lock, 0) atomic.Store(&prof.signalLock, 0)
lock(&sched.lock) lock(&sched.lock)
sched.profilehz = hz sched.profilehz = hz
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment