Commit 6eb251f2 authored by Russ Cox's avatar Russ Cox

runtime: malloc sampling, pprof interface

R=r
CC=golang-dev
https://golang.org/cl/719041
parent 6b6c3993
...@@ -83,6 +83,7 @@ DIRS=\ ...@@ -83,6 +83,7 @@ DIRS=\
hash/crc32\ hash/crc32\
hash/crc64\ hash/crc64\
http\ http\
http/pprof\
image\ image\
image/jpeg\ image/jpeg\
image/png\ image/png\
...@@ -103,6 +104,7 @@ DIRS=\ ...@@ -103,6 +104,7 @@ DIRS=\
regexp\ regexp\
rpc\ rpc\
runtime\ runtime\
runtime/pprof\
scanner\ scanner\
sort\ sort\
strconv\ strconv\
...@@ -130,10 +132,12 @@ NOTEST=\ ...@@ -130,10 +132,12 @@ NOTEST=\
go/doc\ go/doc\
go/token\ go/token\
hash\ hash\
http/pprof\
image\ image\
image/jpeg\ image/jpeg\
rand\ rand\
runtime\ runtime\
runtime/pprof\
syscall\ syscall\
testing/iotest\ testing/iotest\
xgb\ xgb\
......
# Copyright 2010 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include ../../../Make.$(GOARCH)
TARG=http/pprof
GOFILES=\
pprof.go\
include ../../../Make.pkg
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package pprof serves via its HTTP server runtime profiling data
// in the format expected by the pprof visualization tool.
// For more information about pprof, see
// http://code.google.com/p/google-perftools/.
//
// The package is typically only imported for the side effect of
// registering its HTTP handlers.
// The handled paths all begin with /debug/pprof/.
//
// To use pprof, link this package into your program:
// import _ "http/pprof"
//
// Then use the pprof tool to look at the heap profile:
//
// pprof http://localhost:6060/debug/pprof/heap
//
package pprof
import (
"bufio"
"fmt"
"http"
"os"
"runtime"
"runtime/pprof"
"strconv"
"strings"
)
func init() {
http.Handle("/debug/pprof/cmdline", http.HandlerFunc(Cmdline))
http.Handle("/debug/pprof/heap", http.HandlerFunc(Heap))
http.Handle("/debug/pprof/symbol", http.HandlerFunc(Symbol))
}
// Cmdline responds with the running program's
// command line, with arguments separated by NUL bytes.
// The package initialization registers it as /debug/pprof/cmdline.
func Cmdline(c *http.Conn, r *http.Request) {
c.SetHeader("content-type", "text/plain; charset=utf-8")
fmt.Fprintf(c, strings.Join(os.Args, "\x00"))
}
// Heap responds with the pprof-formatted heap profile.
// The package initialization registers it as /debug/pprof/heap.
func Heap(c *http.Conn, r *http.Request) {
c.SetHeader("content-type", "text/plain; charset=utf-8")
pprof.WriteHeapProfile(c)
}
// Symbol looks up the program counters listed in the request,
// responding with a table mapping program counters to function names.
// The package initialization registers it as /debug/pprof/symbol.
func Symbol(c *http.Conn, r *http.Request) {
c.SetHeader("content-type", "text/plain; charset=utf-8")
// We don't know how many symbols we have, but we
// do have symbol information. Pprof only cares whether
// this number is 0 (no symbols available) or > 0.
fmt.Fprintf(c, "num_symbols: 1\n")
var b *bufio.Reader
if r.Method == "POST" {
b = bufio.NewReader(r.Body)
} else {
b = bufio.NewReader(strings.NewReader(r.URL.RawQuery))
}
for {
w, err := b.ReadSlice('+')
if err == nil {
w = w[0 : len(w)-1] // trim +
}
pc, _ := strconv.Btoui64(string(w), 0)
if pc != 0 {
f := runtime.FuncForPC(uintptr(pc))
if f != nil {
fmt.Fprintf(c, "%#x %s\n", pc, f.Name())
}
}
// Wait until here to check for err; the last
// symbol will have an err because it doesn't end in +.
if err != nil {
break
}
}
}
...@@ -33,6 +33,64 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool) ...@@ -33,6 +33,64 @@ func Caller(skip int) (pc uintptr, file string, line int, ok bool)
// It returns the number of entries written to pc. // It returns the number of entries written to pc.
func Callers(skip int, pc []int) int func Callers(skip int, pc []int) int
// FuncForPC returns a *Func describing the function that contains the
// given program counter address, or else nil.
func FuncForPC(pc uintptr) *Func
// NOTE(rsc): Func must match struct Func in runtime.h
// Func records information about a function in the program,
// in particular the mapping from program counters to source
// line numbers within that function.
type Func struct {
name string
typ string
src string
pcln []byte
entry uintptr
pc0 uintptr
ln0 int32
frame int32
args int32
locals int32
}
// Name returns the name of the function.
func (f *Func) Name() string { return f.name }
// Entry returns the entry address of the function.
func (f *Func) Entry() uintptr { return f.entry }
// FileLine returns the file name and line number of the
// source code corresponding to the program counter pc.
// The result will not be accurate if pc is not a program
// counter within f.
func (f *Func) FileLine(pc uintptr) (file string, line int) {
// NOTE(rsc): If you edit this function, also edit
// symtab.c:/^funcline.
const PcQuant = 1
p := f.pcln
pc1 := f.pc0
line = int(f.ln0)
file = f.src
for i := 0; i < len(p) && pc1 <= pc; i++ {
switch {
case p[i] == 0:
line += int(p[i+1]<<24) | int(p[i+2]<<16) | int(p[i+3]<<8) | int(p[i+4])
i += 4
case p[i] <= 64:
line += int(p[i])
case p[i] <= 128:
line += int(p[i] - 64)
default:
line += PcQuant * int(p[i]-129)
}
pc += PcQuant
}
return
}
// mid returns the current os thread (m) id. // mid returns the current os thread (m) id.
func mid() uint32 func mid() uint32
...@@ -175,18 +233,55 @@ func GOROOT() string { ...@@ -175,18 +233,55 @@ func GOROOT() string {
// at the time of the build. // at the time of the build.
func Version() string { return defaultVersion } func Version() string { return defaultVersion }
// MemProfileKind specifies how frequently to record // MemProfileRate controls the fraction of memory allocations
// memory allocations in the memory profiler. // that are recorded and reported in the memory profile.
type MemProfileKind int // The profiler aims to sample an average of
// one allocation per MemProfileRate bytes allocated.
//
// To include every allocated block in the profile, set MemProfileRate to 1.
// To turn off profiling entirely, set MemProfileRate to 0.
//
// The tools that process the memory profiles assume that the
// profile rate is constant across the lifetime of the program
// and equal to the current value. Programs that change the
// memory profiling rate should do so just once, as early as
// possible in the execution of the program (for example,
// at the beginning of main).
var MemProfileRate int = 512 * 1024
// A MemProfileRecord describes the live objects allocated
// by a particular call sequence (stack trace).
type MemProfileRecord struct {
AllocBytes, FreeBytes int64 // number of bytes allocated, freed
AllocObjects, FreeObjects int64 // number of objects allocated, freed
Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
}
const ( // InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
MemProfileNone MemProfileKind = iota // no profiling func (r *MemProfileRecord) InUseBytes() int64 { return r.AllocBytes - r.FreeBytes }
MemProfileSample // profile random sample
MemProfileAll // profile every allocation
)
// SetMemProfileKind sets the fraction of memory allocations // InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
// that are recorded and reported in the memory profile. func (r *MemProfileRecord) InUseObjects() int64 {
// Profiling an allocation has a small overhead, so the default return r.AllocObjects - r.FreeObjects
// is to profile only a random sample, weighted by block size. }
func SetMemProfileKind(kind MemProfileKind)
// Stack returns the stack trace associated with the record,
// a prefix of r.Stack0.
func (r *MemProfileRecord) Stack() []uintptr {
for i, v := range r.Stack0 {
if v == 0 {
return r.Stack0[0:i]
}
}
return r.Stack0[0:]
}
// MemProfile returns n, the number of records in the current memory profile.
// If len(p) >= n, MemProfile copies the profile into p and returns n, true.
// If len(p) < n, MemProfile does not change p and returns n, false.
//
// If inuseZero is true, the profile includes allocation records
// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
// These are sites where memory was allocated, but it has all
// been released back to the runtime.
func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool)
...@@ -15,6 +15,8 @@ package runtime ...@@ -15,6 +15,8 @@ package runtime
MHeap mheap; MHeap mheap;
MStats mstats; MStats mstats;
extern volatile int32 ·MemProfileRate;
// Same algorithm from chan.c, but a different // Same algorithm from chan.c, but a different
// instance of the static uint32 x. // instance of the static uint32 x.
// Not protected by a lock - let the threads use // Not protected by a lock - let the threads use
...@@ -36,7 +38,7 @@ fastrand1(void) ...@@ -36,7 +38,7 @@ fastrand1(void)
void* void*
mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed, int32 skip_depth) mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed, int32 skip_depth)
{ {
int32 sizeclass; int32 sizeclass, rate;
MCache *c; MCache *c;
uintptr npages; uintptr npages;
MSpan *s; MSpan *s;
...@@ -91,19 +93,19 @@ mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed, int32 skip_dept ...@@ -91,19 +93,19 @@ mallocgc(uintptr size, uint32 refflag, int32 dogc, int32 zeroed, int32 skip_dept
m->mallocing = 0; m->mallocing = 0;
if(!(refflag & RefNoProfiling) && malloc_profile != MProf_None) { if(!(refflag & RefNoProfiling) && (rate = ·MemProfileRate) > 0) {
switch(malloc_profile) { if(size >= rate)
case MProf_Sample: goto profile;
if(m->mcache->next_sample > size) { if(m->mcache->next_sample > size)
m->mcache->next_sample -= size; m->mcache->next_sample -= size;
break; else {
} // pick next profile time
m->mcache->next_sample = fastrand1() & (256*1024 - 1); // sample every 128 kB allocated, on average if(rate > 0x3fffffff) // make 2*rate not overflow
// fall through rate = 0x3fffffff;
case MProf_All: m->mcache->next_sample = fastrand1() % (2*rate);
profile:
*ref |= RefProfiled; *ref |= RefProfiled;
MProf_Malloc(skip_depth+1, v, size); MProf_Malloc(skip_depth+1, v, size);
break;
} }
} }
......
...@@ -11,8 +11,6 @@ package runtime ...@@ -11,8 +11,6 @@ package runtime
#include "defs.h" #include "defs.h"
#include "type.h" #include "type.h"
int32 malloc_profile = MProf_None; // no sampling during bootstrap
// NOTE(rsc): Everything here could use cas if contention became an issue. // NOTE(rsc): Everything here could use cas if contention became an issue.
static Lock proflock; static Lock proflock;
...@@ -58,7 +56,7 @@ stkbucket(uintptr *stk, int32 nstk) ...@@ -58,7 +56,7 @@ stkbucket(uintptr *stk, int32 nstk)
} }
h += h<<3; h += h<<3;
h ^= h>>11; h ^= h>>11;
i = h%BuckHashSize; i = h%BuckHashSize;
for(b = buckhash[i]; b; b=b->next) for(b = buckhash[i]; b; b=b->next)
if(b->hash == h && b->nstk == nstk && if(b->hash == h && b->nstk == nstk &&
...@@ -162,7 +160,7 @@ getaddrbucket(uintptr addr) ...@@ -162,7 +160,7 @@ getaddrbucket(uintptr addr)
AddrHash *ah; AddrHash *ah;
AddrEntry *e, **l; AddrEntry *e, **l;
Bucket *b; Bucket *b;
h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits); h = (uint32)((addr>>20)*HashMultiplier) >> (32-AddrHashBits);
for(ah=addrhash[h]; ah; ah=ah->next) for(ah=addrhash[h]; ah; ah=ah->next)
if(ah->addr == (addr>>20)) if(ah->addr == (addr>>20))
...@@ -191,6 +189,10 @@ MProf_Malloc(int32 skip, void *p, uintptr size) ...@@ -191,6 +189,10 @@ MProf_Malloc(int32 skip, void *p, uintptr size)
uintptr stk[32]; uintptr stk[32];
Bucket *b; Bucket *b;
if(m->nomemprof > 0)
return;
m->nomemprof++;
nstk = callers(1+skip, stk, 32); nstk = callers(1+skip, stk, 32);
lock(&proflock); lock(&proflock);
b = stkbucket(stk, nstk); b = stkbucket(stk, nstk);
...@@ -198,6 +200,7 @@ MProf_Malloc(int32 skip, void *p, uintptr size) ...@@ -198,6 +200,7 @@ MProf_Malloc(int32 skip, void *p, uintptr size)
b->alloc_bytes += size; b->alloc_bytes += size;
setaddrbucket((uintptr)p, b); setaddrbucket((uintptr)p, b);
unlock(&proflock); unlock(&proflock);
m->nomemprof--;
} }
// Called when freeing a profiled block. // Called when freeing a profiled block.
...@@ -206,6 +209,10 @@ MProf_Free(void *p, uintptr size) ...@@ -206,6 +209,10 @@ MProf_Free(void *p, uintptr size)
{ {
Bucket *b; Bucket *b;
if(m->nomemprof > 0)
return;
m->nomemprof++;
lock(&proflock); lock(&proflock);
b = getaddrbucket((uintptr)p); b = getaddrbucket((uintptr)p);
if(b != nil) { if(b != nil) {
...@@ -213,13 +220,53 @@ MProf_Free(void *p, uintptr size) ...@@ -213,13 +220,53 @@ MProf_Free(void *p, uintptr size)
b->free_bytes += size; b->free_bytes += size;
} }
unlock(&proflock); unlock(&proflock);
m->nomemprof--;
} }
// Go interface to profile data. (Declared in extern.go) // Go interface to profile data. (Declared in extern.go)
// Assumes Go sizeof(int) == sizeof(int32) // Assumes Go sizeof(int) == sizeof(int32)
func SetMemProfileKind(kind int32) { // Must match MemProfileRecord in extern.go.
malloc_profile = kind; typedef struct Record Record;
struct Record {
int64 alloc_bytes, free_bytes;
int64 alloc_objects, free_objects;
uintptr stk[32];
};
// Write b's data to r.
static void
record(Record *r, Bucket *b)
{
int32 i;
r->alloc_bytes = b->alloc_bytes;
r->free_bytes = b->free_bytes;
r->alloc_objects = b->allocs;
r->free_objects = b->frees;
for(i=0; i<b->nstk && i<nelem(r->stk); i++)
r->stk[i] = b->stk[i];
for(; i<nelem(r->stk); i++)
r->stk[i] = 0;
} }
func MemProfile(p Slice, include_inuse_zero bool) (n int32, ok bool) {
Bucket *b;
Record *r;
lock(&proflock);
n = 0;
for(b=buckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
n++;
ok = false;
if(n <= p.len) {
ok = true;
r = (Record*)p.array;
for(b=buckets; b; b=b->allnext)
if(include_inuse_zero || b->alloc_bytes != b->free_bytes)
record(r++, b);
}
unlock(&proflock);
}
# Copyright 2010 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
include ../../../Make.$(GOARCH)
TARG=runtime/pprof
GOFILES=\
pprof.go\
include ../../../Make.pkg
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package pprof writes runtime profiling data in the format expected
// by the pprof visualization tool.
// For more information about pprof, see
// http://code.google.com/p/google-perftools/.
package pprof
import (
"bufio"
"fmt"
"io"
"os"
"runtime"
)
// WriteHeapProfile writes a pprof-formatted heap profile to w.
// If a write to w returns an error, WriteHeapProfile returns that error.
// Otherwise, WriteHeapProfile returns nil.
func WriteHeapProfile(w io.Writer) os.Error {
// Find out how many records there are (MemProfile(nil, false)),
// allocate that many records, and get the data.
// There's a race—more records might be added between
// the two calls—so allocate a few extra records for safety
// and also try again if we're very unlucky.
// The loop should only execute one iteration in the common case.
var p []runtime.MemProfileRecord
n, ok := runtime.MemProfile(nil, false)
for {
// Allocate room for a slightly bigger profile,
// in case a few more entries have been added
// since the call to MemProfile.
p = make([]runtime.MemProfileRecord, n+50)
n, ok = runtime.MemProfile(p, false)
if ok {
p = p[0:n]
break
}
// Profile grew; try again.
}
var total runtime.MemProfileRecord
for i := range p {
r := &p[i]
total.AllocBytes += r.AllocBytes
total.AllocObjects += r.AllocObjects
total.FreeBytes += r.FreeBytes
total.FreeObjects += r.FreeObjects
}
// Technically the rate is MemProfileRate not 2*MemProfileRate,
// but early versions of the C++ heap profiler reported 2*MemProfileRate,
// so that's what pprof has come to expect.
b := bufio.NewWriter(w)
fmt.Fprintf(b, "heap profile: %d: %d [%d: %d] @ heap/%d\n",
total.InUseObjects(), total.InUseBytes(),
total.AllocObjects, total.AllocBytes,
2*runtime.MemProfileRate)
for i := range p {
r := &p[i]
fmt.Fprintf(b, "%d: %d [%d: %d] @",
r.InUseObjects(), r.InUseBytes(),
r.AllocObjects, r.AllocBytes)
for _, pc := range r.Stack() {
fmt.Fprintf(b, " %#x", pc)
}
fmt.Fprintf(b, "\n")
}
return b.Flush()
}
...@@ -102,6 +102,7 @@ schedinit(void) ...@@ -102,6 +102,7 @@ schedinit(void)
byte *p; byte *p;
allm = m; allm = m;
m->nomemprof++;
mallocinit(); mallocinit();
goargs(); goargs();
...@@ -118,6 +119,8 @@ schedinit(void) ...@@ -118,6 +119,8 @@ schedinit(void)
sched.mcpumax = sched.gomaxprocs; sched.mcpumax = sched.gomaxprocs;
sched.mcount = 1; sched.mcount = 1;
sched.predawn = 1; sched.predawn = 1;
m->nomemprof--;
} }
// Called after main·init_function; main·main will be called on return. // Called after main·init_function; main·main will be called on return.
......
...@@ -210,7 +210,7 @@ void ...@@ -210,7 +210,7 @@ void
·getgoroot(String out) ·getgoroot(String out)
{ {
byte *p; byte *p;
p = getenv("GOROOT"); p = getenv("GOROOT");
out = gostring(p); out = gostring(p);
FLUSH(&out); FLUSH(&out);
...@@ -475,7 +475,7 @@ nanotime(void) ...@@ -475,7 +475,7 @@ nanotime(void)
{ {
int64 sec; int64 sec;
int32 usec; int32 usec;
sec = 0; sec = 0;
usec = 0; usec = 0;
gettime(&sec, &usec); gettime(&sec, &usec);
...@@ -507,3 +507,10 @@ void ...@@ -507,3 +507,10 @@ void
retn = callers(skip, (uintptr*)pc.array, pc.len); retn = callers(skip, (uintptr*)pc.array, pc.len);
FLUSH(&retn); FLUSH(&retn);
} }
void
·FuncForPC(uintptr pc, void *retf)
{
retf = findfunc(pc);
FLUSH(&retf);
}
...@@ -212,6 +212,7 @@ struct M ...@@ -212,6 +212,7 @@ struct M
int32 mallocing; int32 mallocing;
int32 gcing; int32 gcing;
int32 locks; int32 locks;
int32 nomemprof;
int32 waitnextg; int32 waitnextg;
Note havenextg; Note havenextg;
G* nextg; G* nextg;
...@@ -259,20 +260,18 @@ enum ...@@ -259,20 +260,18 @@ enum
SigQueue = 1<<3, SigQueue = 1<<3,
}; };
// (will be) shared with go; edit ../cmd/6g/sys.go too. // NOTE(rsc): keep in sync with extern.go:/type.Func.
// should move out of sys.go eventually. // Eventually, the loaded symbol table should be closer to this form.
// also eventually, the loaded symbol table should
// be closer to this form.
struct Func struct Func
{ {
String name; String name;
String type; // go type string String type; // go type string
String src; // src file name String src; // src file name
uint64 entry; // entry pc
int64 frame; // stack frame size
Slice pcln; // pc/ln tab for this func Slice pcln; // pc/ln tab for this func
int64 pc0; // starting pc, ln for table uintptr entry; // entry pc
uintptr pc0; // starting pc, ln for table
int32 ln0; int32 ln0;
int32 frame; // stack frame size
int32 args; // number of 32-bit in/out args int32 args; // number of 32-bit in/out args
int32 locals; // number of 32-bit locals int32 locals; // number of 32-bit locals
}; };
......
...@@ -299,6 +299,7 @@ splitpcln(void) ...@@ -299,6 +299,7 @@ splitpcln(void)
// Return actual file line number for targetpc in func f. // Return actual file line number for targetpc in func f.
// (Source file is f->src.) // (Source file is f->src.)
// NOTE(rsc): If you edit this function, also edit extern.go:/FileLine
int32 int32
funcline(Func *f, uint64 targetpc) funcline(Func *f, uint64 targetpc)
{ {
...@@ -333,6 +334,12 @@ buildfuncs(void) ...@@ -333,6 +334,12 @@ buildfuncs(void)
if(func != nil) if(func != nil)
return; return;
// Memory profiling uses this code;
// can deadlock if the profiler ends
// up back here.
m->nomemprof++;
// count funcs, fnames // count funcs, fnames
nfunc = 0; nfunc = 0;
nfname = 0; nfname = 0;
...@@ -350,6 +357,8 @@ buildfuncs(void) ...@@ -350,6 +357,8 @@ buildfuncs(void)
// record src file and line info for each func // record src file and line info for each func
walksymtab(dosrcline); walksymtab(dosrcline);
m->nomemprof--;
} }
Func* Func*
......
...@@ -30,8 +30,9 @@ func bigger() { ...@@ -30,8 +30,9 @@ func bigger() {
} }
func main() { func main() {
flag.Parse() runtime.MemProfileRate = 0 // disable profiler
runtime.MemStats.Alloc = 0 // ignore stacks runtime.MemStats.Alloc = 0 // ignore stacks
flag.Parse()
for i := 0; i < 1<<7; i++ { for i := 0; i < 1<<7; i++ {
for j := 1; j <= 1<<22; j <<= 1 { for j := 1; j <= 1<<22; j <<= 1 {
if i == 0 && *chatty { if i == 0 && *chatty {
......
...@@ -92,6 +92,7 @@ func atoi(s string) int { ...@@ -92,6 +92,7 @@ func atoi(s string) int {
} }
func main() { func main() {
runtime.MemProfileRate = 0 // disable profiler
flag.Parse() flag.Parse()
b = make([]*byte, 10000) b = make([]*byte, 10000)
if flag.NArg() > 0 { if flag.NArg() > 0 {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment