prof: increase precision of measurements

Currently we use millisecond ticks for time spent in each function.
This is not good enough for modern machines where fast functions could
be completed in a handful of nanoseconds.  Instead let us just use the
raw ticks and store the cyclecfreq in the output prof data.  This
requires that we enlargen the time section in the data to 8 bytes,
which broke the assumptions for struct allignment and required a
slight refactor of the code used to read it in prof(1).  Since this is
a breaking change, we've devised a small format to communicate the
version for future revision of this format and this patch includes a
modification to file(1) for recognizing this format.  Additionally
some minor improvements were made across the board.
This commit is contained in:
Jacob Moody 2024-05-08 17:20:22 +00:00
parent 0fcab2a6e8
commit c8544c91d9
4 changed files with 94 additions and 136 deletions

View file

@ -6,12 +6,8 @@ prof, tprof, kprof \- display profiling data
[
.B -dr
]
[
.I program
]
[
.I profile
]
.PP
.B tprof
.I pid
@ -26,16 +22,11 @@ interprets files produced automatically by programs loaded using the
option of
.IR 2l (1)
or other loader.
The symbol table in the
named program file
.RL ( 2.out
etc., according to
.BR $objtype ,
by default)
is read and correlated with the
profile file
.RL ( prof.out
by default).
The symbol table in the named
.I program
file is read and correlated with the
.I profile
file.
For each symbol, the percentage
of time (in seconds) spent executing between that symbol
and the next
@ -59,7 +50,7 @@ where
.I symbol
is the entry point of the call,
.I time
is in milliseconds,
is in seconds,
and
.I ncall
is the number of times that entry point was called at that

View file

@ -918,6 +918,7 @@ struct FILE_STRING
"MThd", "midi audio", 4, "audio/midi",
"MUS\x1a", "mus audio", 4, "audio/mus",
"Creative Voice File\x1a", "voc audio", 20, "audio/x-voc",
"pr\x0f", "Plan 9 profile data", 3, OCTET,
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\x00\x00\x00\xbb\x11\x22\x00\x44\xff\xff\xff\xff\xff\xff\xff\xff"

View file

@ -13,8 +13,9 @@ struct Data
ushort right;
ulong pc;
ulong count;
ulong time;
uvlong time;
};
enum { Datasz = 2+2+4+4+8 };
struct Pc
{
@ -26,13 +27,13 @@ struct Acc
{
char *name;
ulong pc;
ulong ms;
ulong calls;
uvlong ticks;
uvlong calls;
};
Data* data;
Acc* acc;
ulong ms;
uvlong ticks;
long nsym;
long ndata;
int dflag;
@ -40,6 +41,7 @@ int rflag;
Biobuf bout;
int tabstop = 4;
int verbose;
uvlong cyclefreq;
void syms(char*);
void datas(char*);
@ -68,18 +70,15 @@ main(int argc, char *argv[])
rflag = 1;
break;
default:
usage:
fprint(2, "usage: prof [-dr] [8.out] [prof.out]\n");
exits("usage");
}ARGEND
Binit(&bout, 1, OWRITE);
if(argc > 0)
syms(argv[0]);
else
syms(defaout());
if(argc > 1)
datas(argv[1]);
else
datas("prof.out");
if(argc < 2)
goto usage;
syms(argv[0]);
datas(argv[1]);
if(ndata){
if(dflag)
graph(0, data[0].down, 0);
@ -89,26 +88,16 @@ main(int argc, char *argv[])
exits(0);
}
void
swapdata(Data *dp)
{
dp->down = beswab(dp->down);
dp->right = beswab(dp->right);
dp->pc = beswal(dp->pc);
dp->count = beswal(dp->count);
dp->time = beswal(dp->time);
}
int
acmp(void *va, void *vb)
{
Acc *a, *b;
ulong ua, ub;
uvlong ua, ub;
a = va;
b = vb;
ua = a->ms;
ub = b->ms;
ua = a->ticks;
ub = b->ticks;
if(ua > ub)
return 1;
@ -123,31 +112,29 @@ syms(char *cout)
Fhdr f;
int fd;
if((fd = open(cout, 0)) < 0){
perror(cout);
exits("open");
}
if (!crackhdr(fd, &f)) {
fprint(2, "can't read text file header\n");
exits("read");
}
if (f.type == FNONE) {
fprint(2, "text file not an a.out\n");
exits("file type");
}
if (syminit(fd, &f) < 0) {
fprint(2, "syminit: %r\n");
exits("syms");
}
if((fd = open(cout, 0)) < 0)
sysfatal("%r");
if(!crackhdr(fd, &f))
sysfatal("can't read text file header: %r");
if(f.type == FNONE)
sysfatal("text file not an a.out");
if(syminit(fd, &f) < 0)
sysfatal("syminit: %r");
close(fd);
}
#define GET2(p) (u16int)(p)[1] | (u16int)(p)[0]<<8
#define GET4(p) (u32int)(p)[3] | (u32int)(p)[2]<<8 | (u32int)(p)[1]<<16 | (u32int)(p)[0]<<24
#define GET8(p) (u64int)(p)[7] | (u64int)(p)[6]<<8 | (u64int)(p)[5]<<16 | (u64int)(p)[4]<<24 | \
(u64int)(p)[3]<<32 | (u64int)(p)[2]<<40 | (u64int)(p)[1]<<48 | (u64int)(p)[0]<<56
void
datas(char *dout)
{
int fd;
Dir *d;
int i;
uchar hdr[3+1+8], *buf, *p;
if((fd = open(dout, 0)) < 0){
perror(dout);
@ -158,20 +145,29 @@ datas(char *dout)
perror(dout);
exits("stat");
}
ndata = d->length/sizeof(data[0]);
d->length -= sizeof hdr;
ndata = d->length/Datasz;
data = malloc(ndata*sizeof(Data));
if(data == 0){
fprint(2, "prof: can't malloc data\n");
exits("data malloc");
}
if(read(fd, data, d->length) != d->length){
fprint(2, "prof: can't read data file\n");
exits("data read");
buf = malloc(d->length);
if(buf == 0 || data == 0)
sysfatal("malloc");
if(read(fd, hdr, sizeof hdr) != sizeof hdr)
sysfatal("read data header: %r");
if(memcmp(hdr, "pr\x0f", 3) != 0)
sysfatal("bad magic");
cyclefreq = GET8(hdr+4);
if(readn(fd, buf, d->length) != d->length)
sysfatal("data file read: %r");
for(p = buf, i = 0; i < ndata; i++){
data[i].down = GET2(p); p += 2;
data[i].right = GET2(p); p += 2;
data[i].pc = GET4(p); p += 4;
data[i].count = GET4(p); p += 4;
data[i].time = GET8(p); p += 8;
}
free(buf);
free(d);
close(fd);
for (i = 0; i < ndata; i++)
swapdata(data+i);
}
char*
@ -189,7 +185,8 @@ name(ulong pc)
void
graph(int ind, ulong i, Pc *pc)
{
long time, count, prgm;
long count, prgm;
vlong time;
Pc lpc;
if(i >= ndata){
@ -205,9 +202,9 @@ graph(int ind, ulong i, Pc *pc)
graph(ind, data[i].right, pc);
indent(ind);
if(count == 1)
Bprint(&bout, "%s:%lud\n", name(prgm), time);
Bprint(&bout, "%s:%.9f\n", name(prgm), (double)time/cyclefreq);
else
Bprint(&bout, "%s:%lud/%lud\n", name(prgm), time, count);
Bprint(&bout, "%s:%.9f/%lud\n", name(prgm), (double)time/cyclefreq, count);
if(data[i].down == 0xFFFF)
return;
lpc.next = pc;
@ -246,10 +243,11 @@ symind(ulong pc)
return -1;
}
ulong
double
sum(ulong i)
{
long j, dtime, time;
long j;
vlong dtime, time;
int k;
static indent;
@ -264,7 +262,7 @@ sum(ulong i)
if (verbose){
for(k = 0; k < indent; k++)
print(" ");
print("%lud: %ld/%lud", i, data[i].time, data[i].count);
print("%lud: %llud/%.9f/%lud", i, data[i].time, (double)data[i].time/cyclefreq, data[i].count);
if (j >= 0)
print(" %s\n", acc[j].name);
else
@ -278,8 +276,8 @@ sum(ulong i)
}
j = symind(data[i].pc);
if (j >= 0) {
acc[j].ms += time - dtime;
ms += time - dtime;
acc[j].ticks += time - dtime;
ticks += time - dtime;
acc[j].calls += data[i].count;
}
if(data[i].right == 0xFFFF)
@ -300,18 +298,18 @@ plot(void)
}
acc[nsym].name = s.name;
acc[nsym].pc = s.value;
acc[nsym].calls = acc[nsym].ms = 0;
acc[nsym].calls = acc[nsym].ticks = 0;
}
sum(data[0].down);
qsort(acc, nsym, sizeof(Acc), acmp);
Bprint(&bout, " %% Time Calls Name\n");
if(ms == 0)
ms = 1;
Bprint(&bout, " %% Time Calls Name\n");
if(ticks == 0)
ticks = 1;
while (--nsym >= 0) {
if(acc[nsym].calls)
Bprint(&bout, "%4.1f %8.3f %8lud\t%s\n",
(100.0*acc[nsym].ms)/ms,
acc[nsym].ms/1000.0,
Bprint(&bout, "%4.1f %.9f %8llud\t%s\n",
(100.0*acc[nsym].ticks)/ticks,
(double)acc[nsym].ticks/cyclefreq,
acc[nsym].calls,
acc[nsym].name);
}
@ -330,30 +328,3 @@ indent(int ind)
if(j)
Bwrite(&bout, ". ", j);
}
char* trans[] =
{
"386", "8.out",
"68020", "2.out",
"amd64", "6.out",
"arm", "5.out",
"mips", "v.out",
"power", "q.out",
"sparc", "k.out",
"spim", "0.out",
0,0
};
char*
defaout(void)
{
char *p;
int i;
p = getenv("objtype");
if(p)
for(i=0; trans[i]; i+=2)
if(strcmp(p, trans[i]) == 0)
return trans[i+1];
return trans[1];
}

View file

@ -6,9 +6,7 @@ extern uintptr _callpc(void**);
extern uintptr _saveret(void);
extern uintptr _savearg(void);
static ulong khz;
static ulong perr;
static int havecycles;
typedef struct Plink Plink;
struct Plink
@ -19,7 +17,6 @@ struct Plink
long pc;
long count;
vlong time;
uint rec;
};
#pragma profile off
@ -45,11 +42,6 @@ _profin(void)
pp = _tos->prof.pp;
if(pp == 0 || (_tos->prof.pid && _tos->pid != _tos->prof.pid))
return _restore(arg, ret);
if(pc == pp->pc){
pp->rec++;
p = pp;
goto out;
}
for(p=pp->down; p; p=p->link)
if(p->pc == pc)
goto out;
@ -119,10 +111,7 @@ _profout(void)
p->time = p->time + _tos->clock;
break;
}
if(p->rec)
p->rec--;
else
_tos->prof.pp = p->old;
_tos->prof.pp = p->old;
return _restore(arg, ret);
}
@ -130,10 +119,11 @@ void
_profdump(void)
{
int f;
long n;
vlong n;
Plink *p;
char *vp;
char filename[64];
uchar hdr[3+1+8] = {'p', 'r', 0x0f, 0x2};
if (_tos->prof.what == 0)
return; /* No profiling */
@ -168,8 +158,17 @@ _profdump(void)
_tos->prof.first->time = _tos->clock;
break;
}
vp = (char*)_tos->prof.first;
hdr[4+0] = _tos->cyclefreq>>56;
hdr[4+1] = _tos->cyclefreq>>48;
hdr[4+2] = _tos->cyclefreq>>40;
hdr[4+3] = _tos->cyclefreq>>32;
hdr[4+4] = _tos->cyclefreq>>24;
hdr[4+5] = _tos->cyclefreq>>16;
hdr[4+6] = _tos->cyclefreq>>8;
hdr[4+7] = _tos->cyclefreq;
write(f, hdr, sizeof hdr);
vp = (char*)_tos->prof.first;
for(p = _tos->prof.first; p <= _tos->prof.next; p++) {
/*
@ -214,16 +213,16 @@ _profdump(void)
/*
* vlong time
*/
if (havecycles){
n = (vlong)(p->time / (vlong)khz);
}else
n = p->time;
vp[0] = n>>24;
vp[1] = n>>16;
vp[2] = n>>8;
vp[3] = n;
vp += 4;
n = p->time;
vp[0] = n>>56;
vp[1] = n>>48;
vp[2] = n>>40;
vp[3] = n>>32;
vp[4] = n>>24;
vp[5] = n>>16;
vp[6] = n>>8;
vp[7] = n;
vp += 8;
}
write(f, (char*)_tos->prof.first, vp - (char*)_tos->prof.first);
close(f);
@ -250,10 +249,6 @@ _profmain(int argc, char **argv)
int n, f;
n = 256*1024;
if (_tos->cyclefreq != 0LL){
khz = _tos->cyclefreq / 1000; /* Report times in milliseconds */
havecycles = 1;
}
f = open("/env/profsize", OREAD|OCEXEC);
if(f >= 0) {
memset(ename, 0, sizeof(ename));