mirror of
git://git.9front.org/plan9front/plan9front
synced 2025-01-12 11:10:06 +00:00
prof: increase precision of measurements
Currently we use millisecond ticks for time spent in each function. This is not good enough for modern machines where fast functions could be completed in a handful of nanoseconds. Instead let us just use the raw ticks and store the cyclecfreq in the output prof data. This requires that we enlargen the time section in the data to 8 bytes, which broke the assumptions for struct allignment and required a slight refactor of the code used to read it in prof(1). Since this is a breaking change, we've devised a small format to communicate the version for future revision of this format and this patch includes a modification to file(1) for recognizing this format. Additionally some minor improvements were made across the board.
This commit is contained in:
parent
0fcab2a6e8
commit
c8544c91d9
4 changed files with 94 additions and 136 deletions
|
@ -6,12 +6,8 @@ prof, tprof, kprof \- display profiling data
|
|||
[
|
||||
.B -dr
|
||||
]
|
||||
[
|
||||
.I program
|
||||
]
|
||||
[
|
||||
.I profile
|
||||
]
|
||||
.PP
|
||||
.B tprof
|
||||
.I pid
|
||||
|
@ -26,16 +22,11 @@ interprets files produced automatically by programs loaded using the
|
|||
option of
|
||||
.IR 2l (1)
|
||||
or other loader.
|
||||
The symbol table in the
|
||||
named program file
|
||||
.RL ( 2.out
|
||||
etc., according to
|
||||
.BR $objtype ,
|
||||
by default)
|
||||
is read and correlated with the
|
||||
profile file
|
||||
.RL ( prof.out
|
||||
by default).
|
||||
The symbol table in the named
|
||||
.I program
|
||||
file is read and correlated with the
|
||||
.I profile
|
||||
file.
|
||||
For each symbol, the percentage
|
||||
of time (in seconds) spent executing between that symbol
|
||||
and the next
|
||||
|
@ -59,7 +50,7 @@ where
|
|||
.I symbol
|
||||
is the entry point of the call,
|
||||
.I time
|
||||
is in milliseconds,
|
||||
is in seconds,
|
||||
and
|
||||
.I ncall
|
||||
is the number of times that entry point was called at that
|
||||
|
|
|
@ -918,6 +918,7 @@ struct FILE_STRING
|
|||
"MThd", "midi audio", 4, "audio/midi",
|
||||
"MUS\x1a", "mus audio", 4, "audio/mus",
|
||||
"Creative Voice File\x1a", "voc audio", 20, "audio/x-voc",
|
||||
"pr\x0f", "Plan 9 profile data", 3, OCTET,
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
|
|
|
@ -13,8 +13,9 @@ struct Data
|
|||
ushort right;
|
||||
ulong pc;
|
||||
ulong count;
|
||||
ulong time;
|
||||
uvlong time;
|
||||
};
|
||||
enum { Datasz = 2+2+4+4+8 };
|
||||
|
||||
struct Pc
|
||||
{
|
||||
|
@ -26,13 +27,13 @@ struct Acc
|
|||
{
|
||||
char *name;
|
||||
ulong pc;
|
||||
ulong ms;
|
||||
ulong calls;
|
||||
uvlong ticks;
|
||||
uvlong calls;
|
||||
};
|
||||
|
||||
Data* data;
|
||||
Acc* acc;
|
||||
ulong ms;
|
||||
uvlong ticks;
|
||||
long nsym;
|
||||
long ndata;
|
||||
int dflag;
|
||||
|
@ -40,6 +41,7 @@ int rflag;
|
|||
Biobuf bout;
|
||||
int tabstop = 4;
|
||||
int verbose;
|
||||
uvlong cyclefreq;
|
||||
|
||||
void syms(char*);
|
||||
void datas(char*);
|
||||
|
@ -68,18 +70,15 @@ main(int argc, char *argv[])
|
|||
rflag = 1;
|
||||
break;
|
||||
default:
|
||||
usage:
|
||||
fprint(2, "usage: prof [-dr] [8.out] [prof.out]\n");
|
||||
exits("usage");
|
||||
}ARGEND
|
||||
Binit(&bout, 1, OWRITE);
|
||||
if(argc > 0)
|
||||
syms(argv[0]);
|
||||
else
|
||||
syms(defaout());
|
||||
if(argc > 1)
|
||||
datas(argv[1]);
|
||||
else
|
||||
datas("prof.out");
|
||||
if(argc < 2)
|
||||
goto usage;
|
||||
syms(argv[0]);
|
||||
datas(argv[1]);
|
||||
if(ndata){
|
||||
if(dflag)
|
||||
graph(0, data[0].down, 0);
|
||||
|
@ -89,26 +88,16 @@ main(int argc, char *argv[])
|
|||
exits(0);
|
||||
}
|
||||
|
||||
void
|
||||
swapdata(Data *dp)
|
||||
{
|
||||
dp->down = beswab(dp->down);
|
||||
dp->right = beswab(dp->right);
|
||||
dp->pc = beswal(dp->pc);
|
||||
dp->count = beswal(dp->count);
|
||||
dp->time = beswal(dp->time);
|
||||
}
|
||||
|
||||
int
|
||||
acmp(void *va, void *vb)
|
||||
{
|
||||
Acc *a, *b;
|
||||
ulong ua, ub;
|
||||
uvlong ua, ub;
|
||||
|
||||
a = va;
|
||||
b = vb;
|
||||
ua = a->ms;
|
||||
ub = b->ms;
|
||||
ua = a->ticks;
|
||||
ub = b->ticks;
|
||||
|
||||
if(ua > ub)
|
||||
return 1;
|
||||
|
@ -123,31 +112,29 @@ syms(char *cout)
|
|||
Fhdr f;
|
||||
int fd;
|
||||
|
||||
if((fd = open(cout, 0)) < 0){
|
||||
perror(cout);
|
||||
exits("open");
|
||||
}
|
||||
if (!crackhdr(fd, &f)) {
|
||||
fprint(2, "can't read text file header\n");
|
||||
exits("read");
|
||||
}
|
||||
if (f.type == FNONE) {
|
||||
fprint(2, "text file not an a.out\n");
|
||||
exits("file type");
|
||||
}
|
||||
if (syminit(fd, &f) < 0) {
|
||||
fprint(2, "syminit: %r\n");
|
||||
exits("syms");
|
||||
}
|
||||
if((fd = open(cout, 0)) < 0)
|
||||
sysfatal("%r");
|
||||
if(!crackhdr(fd, &f))
|
||||
sysfatal("can't read text file header: %r");
|
||||
if(f.type == FNONE)
|
||||
sysfatal("text file not an a.out");
|
||||
if(syminit(fd, &f) < 0)
|
||||
sysfatal("syminit: %r");
|
||||
close(fd);
|
||||
}
|
||||
|
||||
#define GET2(p) (u16int)(p)[1] | (u16int)(p)[0]<<8
|
||||
#define GET4(p) (u32int)(p)[3] | (u32int)(p)[2]<<8 | (u32int)(p)[1]<<16 | (u32int)(p)[0]<<24
|
||||
#define GET8(p) (u64int)(p)[7] | (u64int)(p)[6]<<8 | (u64int)(p)[5]<<16 | (u64int)(p)[4]<<24 | \
|
||||
(u64int)(p)[3]<<32 | (u64int)(p)[2]<<40 | (u64int)(p)[1]<<48 | (u64int)(p)[0]<<56
|
||||
|
||||
void
|
||||
datas(char *dout)
|
||||
{
|
||||
int fd;
|
||||
Dir *d;
|
||||
int i;
|
||||
uchar hdr[3+1+8], *buf, *p;
|
||||
|
||||
if((fd = open(dout, 0)) < 0){
|
||||
perror(dout);
|
||||
|
@ -158,20 +145,29 @@ datas(char *dout)
|
|||
perror(dout);
|
||||
exits("stat");
|
||||
}
|
||||
ndata = d->length/sizeof(data[0]);
|
||||
d->length -= sizeof hdr;
|
||||
ndata = d->length/Datasz;
|
||||
data = malloc(ndata*sizeof(Data));
|
||||
if(data == 0){
|
||||
fprint(2, "prof: can't malloc data\n");
|
||||
exits("data malloc");
|
||||
}
|
||||
if(read(fd, data, d->length) != d->length){
|
||||
fprint(2, "prof: can't read data file\n");
|
||||
exits("data read");
|
||||
buf = malloc(d->length);
|
||||
if(buf == 0 || data == 0)
|
||||
sysfatal("malloc");
|
||||
if(read(fd, hdr, sizeof hdr) != sizeof hdr)
|
||||
sysfatal("read data header: %r");
|
||||
if(memcmp(hdr, "pr\x0f", 3) != 0)
|
||||
sysfatal("bad magic");
|
||||
cyclefreq = GET8(hdr+4);
|
||||
if(readn(fd, buf, d->length) != d->length)
|
||||
sysfatal("data file read: %r");
|
||||
for(p = buf, i = 0; i < ndata; i++){
|
||||
data[i].down = GET2(p); p += 2;
|
||||
data[i].right = GET2(p); p += 2;
|
||||
data[i].pc = GET4(p); p += 4;
|
||||
data[i].count = GET4(p); p += 4;
|
||||
data[i].time = GET8(p); p += 8;
|
||||
}
|
||||
free(buf);
|
||||
free(d);
|
||||
close(fd);
|
||||
for (i = 0; i < ndata; i++)
|
||||
swapdata(data+i);
|
||||
}
|
||||
|
||||
char*
|
||||
|
@ -189,7 +185,8 @@ name(ulong pc)
|
|||
void
|
||||
graph(int ind, ulong i, Pc *pc)
|
||||
{
|
||||
long time, count, prgm;
|
||||
long count, prgm;
|
||||
vlong time;
|
||||
Pc lpc;
|
||||
|
||||
if(i >= ndata){
|
||||
|
@ -205,9 +202,9 @@ graph(int ind, ulong i, Pc *pc)
|
|||
graph(ind, data[i].right, pc);
|
||||
indent(ind);
|
||||
if(count == 1)
|
||||
Bprint(&bout, "%s:%lud\n", name(prgm), time);
|
||||
Bprint(&bout, "%s:%.9f\n", name(prgm), (double)time/cyclefreq);
|
||||
else
|
||||
Bprint(&bout, "%s:%lud/%lud\n", name(prgm), time, count);
|
||||
Bprint(&bout, "%s:%.9f/%lud\n", name(prgm), (double)time/cyclefreq, count);
|
||||
if(data[i].down == 0xFFFF)
|
||||
return;
|
||||
lpc.next = pc;
|
||||
|
@ -246,10 +243,11 @@ symind(ulong pc)
|
|||
return -1;
|
||||
}
|
||||
|
||||
ulong
|
||||
double
|
||||
sum(ulong i)
|
||||
{
|
||||
long j, dtime, time;
|
||||
long j;
|
||||
vlong dtime, time;
|
||||
int k;
|
||||
static indent;
|
||||
|
||||
|
@ -264,7 +262,7 @@ sum(ulong i)
|
|||
if (verbose){
|
||||
for(k = 0; k < indent; k++)
|
||||
print(" ");
|
||||
print("%lud: %ld/%lud", i, data[i].time, data[i].count);
|
||||
print("%lud: %llud/%.9f/%lud", i, data[i].time, (double)data[i].time/cyclefreq, data[i].count);
|
||||
if (j >= 0)
|
||||
print(" %s\n", acc[j].name);
|
||||
else
|
||||
|
@ -278,8 +276,8 @@ sum(ulong i)
|
|||
}
|
||||
j = symind(data[i].pc);
|
||||
if (j >= 0) {
|
||||
acc[j].ms += time - dtime;
|
||||
ms += time - dtime;
|
||||
acc[j].ticks += time - dtime;
|
||||
ticks += time - dtime;
|
||||
acc[j].calls += data[i].count;
|
||||
}
|
||||
if(data[i].right == 0xFFFF)
|
||||
|
@ -300,18 +298,18 @@ plot(void)
|
|||
}
|
||||
acc[nsym].name = s.name;
|
||||
acc[nsym].pc = s.value;
|
||||
acc[nsym].calls = acc[nsym].ms = 0;
|
||||
acc[nsym].calls = acc[nsym].ticks = 0;
|
||||
}
|
||||
sum(data[0].down);
|
||||
qsort(acc, nsym, sizeof(Acc), acmp);
|
||||
Bprint(&bout, " %% Time Calls Name\n");
|
||||
if(ms == 0)
|
||||
ms = 1;
|
||||
Bprint(&bout, " %% Time Calls Name\n");
|
||||
if(ticks == 0)
|
||||
ticks = 1;
|
||||
while (--nsym >= 0) {
|
||||
if(acc[nsym].calls)
|
||||
Bprint(&bout, "%4.1f %8.3f %8lud\t%s\n",
|
||||
(100.0*acc[nsym].ms)/ms,
|
||||
acc[nsym].ms/1000.0,
|
||||
Bprint(&bout, "%4.1f %.9f %8llud\t%s\n",
|
||||
(100.0*acc[nsym].ticks)/ticks,
|
||||
(double)acc[nsym].ticks/cyclefreq,
|
||||
acc[nsym].calls,
|
||||
acc[nsym].name);
|
||||
}
|
||||
|
@ -330,30 +328,3 @@ indent(int ind)
|
|||
if(j)
|
||||
Bwrite(&bout, ". ", j);
|
||||
}
|
||||
|
||||
char* trans[] =
|
||||
{
|
||||
"386", "8.out",
|
||||
"68020", "2.out",
|
||||
"amd64", "6.out",
|
||||
"arm", "5.out",
|
||||
"mips", "v.out",
|
||||
"power", "q.out",
|
||||
"sparc", "k.out",
|
||||
"spim", "0.out",
|
||||
0,0
|
||||
};
|
||||
|
||||
char*
|
||||
defaout(void)
|
||||
{
|
||||
char *p;
|
||||
int i;
|
||||
|
||||
p = getenv("objtype");
|
||||
if(p)
|
||||
for(i=0; trans[i]; i+=2)
|
||||
if(strcmp(p, trans[i]) == 0)
|
||||
return trans[i+1];
|
||||
return trans[1];
|
||||
}
|
||||
|
|
|
@ -6,9 +6,7 @@ extern uintptr _callpc(void**);
|
|||
extern uintptr _saveret(void);
|
||||
extern uintptr _savearg(void);
|
||||
|
||||
static ulong khz;
|
||||
static ulong perr;
|
||||
static int havecycles;
|
||||
|
||||
typedef struct Plink Plink;
|
||||
struct Plink
|
||||
|
@ -19,7 +17,6 @@ struct Plink
|
|||
long pc;
|
||||
long count;
|
||||
vlong time;
|
||||
uint rec;
|
||||
};
|
||||
|
||||
#pragma profile off
|
||||
|
@ -45,11 +42,6 @@ _profin(void)
|
|||
pp = _tos->prof.pp;
|
||||
if(pp == 0 || (_tos->prof.pid && _tos->pid != _tos->prof.pid))
|
||||
return _restore(arg, ret);
|
||||
if(pc == pp->pc){
|
||||
pp->rec++;
|
||||
p = pp;
|
||||
goto out;
|
||||
}
|
||||
for(p=pp->down; p; p=p->link)
|
||||
if(p->pc == pc)
|
||||
goto out;
|
||||
|
@ -119,10 +111,7 @@ _profout(void)
|
|||
p->time = p->time + _tos->clock;
|
||||
break;
|
||||
}
|
||||
if(p->rec)
|
||||
p->rec--;
|
||||
else
|
||||
_tos->prof.pp = p->old;
|
||||
_tos->prof.pp = p->old;
|
||||
return _restore(arg, ret);
|
||||
}
|
||||
|
||||
|
@ -130,10 +119,11 @@ void
|
|||
_profdump(void)
|
||||
{
|
||||
int f;
|
||||
long n;
|
||||
vlong n;
|
||||
Plink *p;
|
||||
char *vp;
|
||||
char filename[64];
|
||||
uchar hdr[3+1+8] = {'p', 'r', 0x0f, 0x2};
|
||||
|
||||
if (_tos->prof.what == 0)
|
||||
return; /* No profiling */
|
||||
|
@ -168,8 +158,17 @@ _profdump(void)
|
|||
_tos->prof.first->time = _tos->clock;
|
||||
break;
|
||||
}
|
||||
vp = (char*)_tos->prof.first;
|
||||
hdr[4+0] = _tos->cyclefreq>>56;
|
||||
hdr[4+1] = _tos->cyclefreq>>48;
|
||||
hdr[4+2] = _tos->cyclefreq>>40;
|
||||
hdr[4+3] = _tos->cyclefreq>>32;
|
||||
hdr[4+4] = _tos->cyclefreq>>24;
|
||||
hdr[4+5] = _tos->cyclefreq>>16;
|
||||
hdr[4+6] = _tos->cyclefreq>>8;
|
||||
hdr[4+7] = _tos->cyclefreq;
|
||||
write(f, hdr, sizeof hdr);
|
||||
|
||||
vp = (char*)_tos->prof.first;
|
||||
for(p = _tos->prof.first; p <= _tos->prof.next; p++) {
|
||||
|
||||
/*
|
||||
|
@ -214,16 +213,16 @@ _profdump(void)
|
|||
/*
|
||||
* vlong time
|
||||
*/
|
||||
if (havecycles){
|
||||
n = (vlong)(p->time / (vlong)khz);
|
||||
}else
|
||||
n = p->time;
|
||||
|
||||
vp[0] = n>>24;
|
||||
vp[1] = n>>16;
|
||||
vp[2] = n>>8;
|
||||
vp[3] = n;
|
||||
vp += 4;
|
||||
n = p->time;
|
||||
vp[0] = n>>56;
|
||||
vp[1] = n>>48;
|
||||
vp[2] = n>>40;
|
||||
vp[3] = n>>32;
|
||||
vp[4] = n>>24;
|
||||
vp[5] = n>>16;
|
||||
vp[6] = n>>8;
|
||||
vp[7] = n;
|
||||
vp += 8;
|
||||
}
|
||||
write(f, (char*)_tos->prof.first, vp - (char*)_tos->prof.first);
|
||||
close(f);
|
||||
|
@ -250,10 +249,6 @@ _profmain(int argc, char **argv)
|
|||
int n, f;
|
||||
|
||||
n = 256*1024;
|
||||
if (_tos->cyclefreq != 0LL){
|
||||
khz = _tos->cyclefreq / 1000; /* Report times in milliseconds */
|
||||
havecycles = 1;
|
||||
}
|
||||
f = open("/env/profsize", OREAD|OCEXEC);
|
||||
if(f >= 0) {
|
||||
memset(ename, 0, sizeof(ename));
|
||||
|
|
Loading…
Reference in a new issue