Commit 937caef8 authored by Rob Pike's avatar Rob Pike

add pprof output format to prof.

amd64 linux only.

R=rsc
CC=golang-dev
https://golang.org/cl/186077
parent 3c753410
......@@ -19,10 +19,15 @@ statistics.
Usage: prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...]
The formats (default -h) are:
The output modes (default -h) are:
-P file.prof:
Write the profile information to file.prof, in the format used by pprof.
At the moment, this only works on Linux amd64 binaries and requires that the
binary be written using 6l -e to produce ELF debug info.
See http://code.google.com/p/google-perftools for details.
-h: histograms
How many times a sample occurred at each location
How many times a sample occurred at each location.
-f: dynamic functions
At each sample period, print the name of the executing function.
-l: dynamic file and line numbers
......
......@@ -20,7 +20,6 @@ char* file = "6.out";
static Fhdr fhdr;
int have_syms;
int fd;
Map *symmap;
struct Ureg_amd64 ureg_amd64;
struct Ureg_x86 ureg_x86;
int total_sec = 0;
......@@ -28,7 +27,17 @@ int delta_msec = 100;
int nsample;
int nsamplethread;
// pprof data, stored as sequences of N followed by N PC values.
// See http://code.google.com/p/google-perftools .
uvlong *ppdata; // traces
Biobuf* pproffd; // file descriptor to write trace info
long ppstart; // start position of current trace
long nppdata; // length of data
long ppalloc; // size of allocated data
char ppmapdata[10*1024]; // the map information for the output file
// output formats
int pprof; // print pprof output to named file
int functions; // print functions
int histograms; // print histograms
int linenums; // print file and line numbers rather than function names
......@@ -46,6 +55,7 @@ Usage(void)
{
fprint(2, "Usage: prof -p pid [-t total_secs] [-d delta_msec] [6.out args ...]\n");
fprint(2, "\tformats (default -h):\n");
fprint(2, "\t\t-c file.prof: write [c]pprof output to file.prof\n");
fprint(2, "\t\t-h: histograms\n");
fprint(2, "\t\t-f: dynamic functions\n");
fprint(2, "\t\t-l: dynamic file and line numbers\n");
......@@ -79,6 +89,7 @@ struct Arch {
int (*getSP)(Map*);
uvlong (*uregPC)(void);
uvlong (*uregSP)(void);
void (*ppword)(uvlong w);
};
void
......@@ -116,24 +127,39 @@ int
amd64_getregs(Map *map)
{
int i;
union {
uvlong regs[1];
struct Ureg_amd64 ureg;
} u;
for(i = 0; i < sizeof ureg_amd64; i+=8) {
if(get8(map, (uvlong)i, &((uvlong*)&ureg_amd64)[i/4]) < 0)
if(get8(map, (uvlong)i, &u.regs[i/8]) < 0)
return -1;
}
ureg_amd64 = u.ureg;
return 0;
}
int
amd64_getPC(Map *map)
{
return get8(map, offsetof(struct Ureg_amd64, ip), (uvlong*)&ureg_amd64.ip);
uvlong x;
int r;
r = get8(map, offsetof(struct Ureg_amd64, ip), &x);
ureg_amd64.ip = x;
return r;
}
int
amd64_getSP(Map *map)
{
return get8(map, offsetof(struct Ureg_amd64, sp), (uvlong*)&ureg_amd64.sp);
uvlong x;
int r;
r = get8(map, offsetof(struct Ureg_amd64, sp), &x);
ureg_amd64.sp = x;
return r;
}
uvlong
......@@ -147,6 +173,22 @@ amd64_uregSP(void) {
return ureg_amd64.sp;
}
void
amd64_ppword(uvlong w)
{
uchar buf[8];
buf[0] = w;
buf[1] = w >> 8;
buf[2] = w >> 16;
buf[3] = w >> 24;
buf[4] = w >> 32;
buf[5] = w >> 40;
buf[6] = w >> 48;
buf[7] = w >> 56;
Bwrite(pproffd, buf, 8);
}
void
x86_regprint(void)
{
......@@ -204,6 +246,18 @@ x86_uregSP(void)
return (uvlong)ureg_x86.sp;
}
void
x86_ppword(uvlong w)
{
uchar buf[4];
buf[0] = w;
buf[1] = w >> 8;
buf[2] = w >> 16;
buf[3] = w >> 24;
Bwrite(pproffd, buf, 4);
}
Arch archtab[] = {
{
"amd64",
......@@ -213,6 +267,7 @@ Arch archtab[] = {
amd64_getSP,
amd64_uregPC,
amd64_uregSP,
amd64_ppword,
},
{
"386",
......@@ -222,6 +277,7 @@ Arch archtab[] = {
x86_getSP,
x86_uregPC,
x86_uregSP,
x86_ppword,
},
{
nil
......@@ -345,6 +401,36 @@ addtohistogram(uvlong pc, uvlong callerpc, uvlong sp)
counters[h] = x;
}
void
addppword(uvlong pc)
{
if(pc == 0) {
return;
}
if(nppdata == ppalloc) {
ppalloc = (1000+nppdata)*2;
ppdata = realloc(ppdata, ppalloc * sizeof ppdata[0]);
if(ppdata == nil) {
fprint(2, "prof: realloc failed: %r\n");
exit(2);
}
}
ppdata[nppdata++] = pc;
}
void
startpptrace()
{
ppstart = nppdata;
addppword(~0);
}
void
endpptrace()
{
ppdata[ppstart] = nppdata-ppstart-1;
}
uvlong nextpc;
void
......@@ -357,9 +443,10 @@ xptrace(Map *map, uvlong pc, uvlong sp, Symbol *sym)
}
if(histograms)
addtohistogram(nextpc, pc, sp);
if(!histograms || stacks > 1) {
if(!histograms || stacks > 1 || pprof) {
if(nextpc == 0)
nextpc = sym->value;
if(stacks){
fprint(2, "%s(", sym->name);
fprint(2, ")");
if(nextpc != sym->value)
......@@ -369,6 +456,10 @@ xptrace(Map *map, uvlong pc, uvlong sp, Symbol *sym)
}
fprint(2, "\n");
}
if (pprof) {
addppword(nextpc);
}
}
nextpc = pc;
}
......@@ -376,15 +467,21 @@ void
stacktracepcsp(Map *map, uvlong pc, uvlong sp)
{
nextpc = pc;
if(pprof){
startpptrace();
}
if(machdata->ctrace==nil)
fprint(2, "no machdata->ctrace\n");
else if(machdata->ctrace(map, pc, sp, 0, xptrace) <= 0)
fprint(2, "no stack frame: pc=%#p sp=%#p\n", pc, sp);
else {
addtohistogram(nextpc, 0, sp);
if(!histograms || stacks > 1)
if(stacks)
fprint(2, "\n");
}
if(pprof){
endpptrace();
}
}
void
......@@ -399,7 +496,7 @@ printpc(Map *map, uvlong pc, uvlong sp)
symoff(buf, sizeof(buf), pc, CANY);
fprint(2, "%s\n", buf);
}
if(stacks){
if(stacks || pprof){
stacktracepcsp(map, pc, sp);
}
else if(histograms){
......@@ -407,14 +504,56 @@ printpc(Map *map, uvlong pc, uvlong sp)
}
}
void
ppmaps(void)
{
int fd, n;
char tmp[100];
Seg *seg;
// If it's Linux, the info is in /proc/$pid/maps
snprint(tmp, sizeof tmp, "/proc/%d/maps", pid);
fd = open(tmp, 0);
if(fd >= 0) {
n = read(fd, ppmapdata, sizeof ppmapdata - 1);
close(fd);
if(n < 0) {
fprint(2, "prof: can't read %s: %r\n", tmp);
exit(2);
}
ppmapdata[n] = 0;
return;
}
// It's probably a mac. Synthesize an entry for the text file.
// The register segment may come first but it has a zero offset, so grab the first non-zero offset segment.
for(n = 0; n < 3; n++){
seg = &map[0]->seg[n];
if(seg->b == 0) {
continue;
}
snprint(ppmapdata, sizeof ppmapdata,
"%.16x-%.16x r-xp %d 00:00 34968549 %s\n",
seg->b, seg->e, seg->f, "/home/r/6.out"
);
return;
}
fprint(2, "prof: no text segment in maps for %s\n", file);
exit(2);
}
void
samples(void)
{
int i, pid, msec;
struct timespec req;
int getmaps;
req.tv_sec = delta_msec/1000;
req.tv_nsec = 1000000*(delta_msec % 1000);
getmaps = 0;
if(pprof)
getmaps= 1;
for(msec = 0; total_sec <= 0 || msec < 1000*total_sec; msec += delta_msec) {
nsample++;
nsamplethread += nthread;
......@@ -433,6 +572,10 @@ samples(void)
getthreads();
if(nthread == 0)
break;
if(getmaps) {
getmaps = 0;
ppmaps();
}
}
}
......@@ -534,6 +677,106 @@ dumphistogram()
}
}
typedef struct Trace Trace;
struct Trace {
int count;
int npc;
uvlong *pc;
Trace *next;
};
void
dumppprof()
{
uvlong i, n, *p, *e;
int ntrace;
Trace *trace, *tp, *up, *prev;
if(!pprof)
return;
e = ppdata + nppdata;
// Create list of traces. First, count the traces
ntrace = 0;
for(p = ppdata; p < e;) {
n = *p++;
p += n;
if(n == 0)
continue;
ntrace++;
}
print("%d traces\n", ntrace);
if(ntrace <= 0)
return;
// Allocate and link the traces together.
trace = malloc(ntrace * sizeof(Trace));
tp = trace;
for(p = ppdata; p < e;) {
n = *p++;
if(n == 0)
continue;
tp->count = 1;
tp->npc = n;
tp->pc = p;
tp->next = tp+1;
tp++;
p += n;
}
trace[ntrace-1].next = nil;
if(0)
for(tp = trace; tp != nil; tp = tp->next) {
print("%d: ", tp->npc);
for(i = 0; i < tp->npc; i++) {
print("%llx ", tp->pc[i]);
}
print("\n");
}
// Eliminate duplicates. Lousy algorithm, although not as bad as it looks because
// the list collapses fast.
for(tp = trace; tp != nil; tp = tp->next) {
prev = tp;
for(up = tp->next; up != nil; up = up->next) {
if(up->npc == tp->npc && memcmp(up->pc, tp->pc, up->npc*sizeof up->pc[0]) == 0) {
tp->count++;
prev->next = up->next;
} else {
prev = up;
}
}
}
for(tp = trace; tp != nil; tp = tp->next) {
print("[%d] %d: ", tp->count, tp->npc);
for(i = 0; i < tp->npc; i++) {
print("%llx ", tp->pc[i]);
}
print("\n");
}
// Write file.
// See http://code.google.com/p/google-perftools/source/browse/trunk/doc/cpuprofile-fileformat.html
// BUG: assumes little endian.
// 1) Header
arch->ppword(0); // must be zero
arch->ppword(3); // 3 words follow in header
arch->ppword(0); // must be zero
arch->ppword(delta_msec * 1000); // sampling period in microseconds
arch->ppword(0); // must be zero (padding)
// 2) One record for each trace.
for(tp = trace; tp != nil; tp = tp->next) {
arch->ppword(tp->count);
arch->ppword(tp->npc);
for(i = 0; i < tp->npc; i++) {
arch->ppword(tp->pc[i]);
}
}
// 3) Binary trailer
arch->ppword(0); // must be zero
arch->ppword(1); // must be one
arch->ppword(0); // must be zero
// 4) Mapped objects.
Bwrite(pproffd, ppmapdata, strlen(ppmapdata));
// 5) That's it.
Bterm(pproffd);
}
int
startprocess(char **argv)
{
......@@ -574,8 +817,18 @@ int
main(int argc, char *argv[])
{
int i;
char *ppfile;
ARGBEGIN{
case 'P':
pprof =1;
ppfile = EARGF(Usage());
pproffd = Bopen(ppfile, OWRITE);
if(pproffd == nil) {
fprint(2, "prof: cannot open %s: %r\n", ppfile);
exit(2);
}
break;
case 'd':
delta_msec = atoi(EARGF(Usage()));
break;
......@@ -600,10 +853,12 @@ main(int argc, char *argv[])
case 's':
stacks++;
break;
default:
Usage();
}ARGEND
if(pid <= 0 && argc == 0)
Usage();
if(functions+linenums+registers+stacks == 0)
if(functions+linenums+registers+stacks+pprof == 0)
histograms = 1;
if(!machbyname("amd64")) {
fprint(2, "prof: no amd64 support\n", pid);
......@@ -651,5 +906,6 @@ main(int argc, char *argv[])
samples();
detach();
dumphistogram();
dumppprof();
exit(0);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment