From 873e5f5094b404c7ac62830341eca4eacf8b4afa Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Fri, 24 Sep 2010 12:25:25 -0400 Subject: [PATCH] libmach: more mach-o support R=rsc http://codereview.appspot.com/2277041 --- src/libmach/LICENSE | 1 + src/libmach/crackmacho.c | 4 +- src/libmach/macho.c | 244 +++++++++++++++++++++++++++++++++++++-- src/libmach/macho.h | 126 ++++++++++++++++---- src/libmach/mkfile | 3 + 5 files changed, 343 insertions(+), 35 deletions(-) diff --git a/src/libmach/LICENSE b/src/libmach/LICENSE index 916fb128..bf30f8ea 100644 --- a/src/libmach/LICENSE +++ b/src/libmach/LICENSE @@ -11,6 +11,7 @@ file such as NOTICE, LICENCE or COPYING. Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). Revisions Copyright © 2000-2004 Lucent Technologies Inc. and others. Portions Copyright © 2001-2007 Russ Cox. + Portions Copyright © 2008-2010 Google Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/src/libmach/crackmacho.c b/src/libmach/crackmacho.c index c19d81e9..bb521fad 100644 --- a/src/libmach/crackmacho.c +++ b/src/libmach/crackmacho.c @@ -115,8 +115,8 @@ crackmacho(int fd, Fhdr *fp) if(m->cmd[i].type == MachoCmdSymtab) break; if(i < m->ncmd){ - fp->stabs.stabbase = load(fp->fd, m->cmd[i].sym.symoff, m->cmd[i].sym.nsyms*16); - fp->stabs.stabsize = m->cmd[i].sym.nsyms*16; + fp->stabs.stabbase = load(fp->fd, m->cmd[i].sym.symoff, m->cmd[i].sym.nsym*16); + fp->stabs.stabsize = m->cmd[i].sym.nsym*16; fp->stabs.strbase = (char*)load(fp->fd, m->cmd[i].sym.stroff, m->cmd[i].sym.strsize); if(fp->stabs.stabbase == nil || fp->stabs.strbase == nil){ fp->stabs.stabbase = nil; diff --git a/src/libmach/macho.c b/src/libmach/macho.c index 9d9a1232..7f12735a 100644 --- a/src/libmach/macho.c +++ b/src/libmach/macho.c @@ -6,6 +6,27 @@ /* http://www.channelu.com/NeXT/NeXTStep/3.3/nd/DevTools/14_MachO/MachO.htmld/ */ +static long +preadn(int fd, void *vdata, uint32 ulen, uint64 offset) +{ + long n; + uchar *data; + long len; + + len = ulen; + data = vdata; +/* fprint(2, "readn 0x%llux 0x%ux\n", offset, ulen); */ + while(len > 0){ + n = pread(fd, data, len, offset); + if(n <= 0) + break; + data += n; + offset += n; + len -= n; + } + return data-(uchar*)vdata; +} + Macho* machoopen(char *name) @@ -22,11 +43,15 @@ machoopen(char *name) } static int -unpackseg(uchar *p, Macho *m, MachoCmd *c, uint type, uint sz) +unpackcmd(uchar *p, Macho *m, MachoCmd *c, uint type, uint sz) { - u32int (*e4)(uchar*); + uint32 (*e4)(uchar*); + uint64 (*e8)(uchar*); + MachoSect *s; + int i; e4 = m->e4; + e8 = m->e8; c->type = type; c->size = sz; @@ -45,59 +70,257 @@ unpackseg(uchar *p, Macho *m, MachoCmd *c, uint type, uint sz) c->seg.initprot = e4(p+44); c->seg.nsect = e4(p+48); c->seg.flags = e4(p+52); + c->seg.sect = mallocz(c->seg.nsect * sizeof c->seg.sect[0], 1); + if(c->seg.sect == nil) + return -1; + if(sz < 56+c->seg.nsect*68) + return -1; + p += 56; + for(i=0; iseg.nsect; i++) { + s = &c->seg.sect[i]; + strecpy(s->name, s->name+sizeof s->name, (char*)p+0); + strecpy(s->segname, s->segname+sizeof s->segname, (char*)p+16); + s->addr = e4(p+32); + s->size = e4(p+36); + s->offset = e4(p+40); + s->align = e4(p+44); + s->reloff = e4(p+48); + s->nreloc = e4(p+52); + s->flags = e4(p+56); + // p+60 and p+64 are reserved + p += 68; + } + break; + case MachoCmdSegment64: + if(sz < 72) + return -1; + strecpy(c->seg.name, c->seg.name+sizeof c->seg.name, (char*)p+8); + c->seg.vmaddr = e8(p+24); + c->seg.vmsize = e8(p+32); + c->seg.fileoff = e8(p+40); + c->seg.filesz = e8(p+48); + c->seg.maxprot = e4(p+56); + c->seg.initprot = e4(p+60); + c->seg.nsect = e4(p+64); + c->seg.flags = e4(p+68); + c->seg.sect = mallocz(c->seg.nsect * sizeof c->seg.sect[0], 1); + if(c->seg.sect == nil) + return -1; + if(sz < 72+c->seg.nsect*80) + return -1; + p += 72; + for(i=0; iseg.nsect; i++) { + s = &c->seg.sect[i]; + strecpy(s->name, s->name+sizeof s->name, (char*)p+0); + strecpy(s->segname, s->segname+sizeof s->segname, (char*)p+16); + s->addr = e8(p+32); + s->size = e8(p+40); + s->offset = e4(p+48); + s->align = e4(p+52); + s->reloff = e4(p+56); + s->nreloc = e4(p+60); + s->flags = e4(p+64); + // p+68, p+72, and p+76 are reserved + p += 80; + } break; case MachoCmdSymtab: if(sz < 24) return -1; c->sym.symoff = e4(p+8); - c->sym.nsyms = e4(p+12); + c->sym.nsym = e4(p+12); c->sym.stroff = e4(p+16); c->sym.strsize = e4(p+20); break; + case MachoCmdDysymtab: + if(sz < 80) + return -1; + c->dsym.ilocalsym = e4(p+8); + c->dsym.nlocalsym = e4(p+12); + c->dsym.iextdefsym = e4(p+16); + c->dsym.nextdefsym = e4(p+20); + c->dsym.iundefsym = e4(p+24); + c->dsym.nundefsym = e4(p+28); + c->dsym.tocoff = e4(p+32); + c->dsym.ntoc = e4(p+36); + c->dsym.modtaboff = e4(p+40); + c->dsym.nmodtab = e4(p+44); + c->dsym.extrefsymoff = e4(p+48); + c->dsym.nextrefsyms = e4(p+52); + c->dsym.indirectsymoff = e4(p+56); + c->dsym.nindirectsyms = e4(p+60); + c->dsym.extreloff = e4(p+64); + c->dsym.nextrel = e4(p+68); + c->dsym.locreloff = e4(p+72); + c->dsym.nlocrel = e4(p+76); + break; } return 0; } +int +macholoadrel(Macho *m, MachoSect *sect) +{ + MachoRel *rel, *r; + uchar *buf, *p; + int i, n; + uint32 v; + + if(sect->rel != nil || sect->nreloc == 0) + return 0; + rel = mallocz(sect->nreloc * sizeof r[0], 1); + if(rel == nil) + return -1; + n = sect->nreloc * 8; + buf = mallocz(n, 1); + if(buf == nil) { + free(rel); + return -1; + } + if(seek(m->fd, sect->reloff, 0) < 0 || readn(m->fd, buf, n) != n) { + free(rel); + free(buf); + return -1; + } + for(i=0; inreloc; i++) { + r = &rel[i]; + p = buf+i*8; + r->addr = m->e4(p); + + // TODO(rsc): Wrong interpretation for big-endian bitfields? + v = m->e4(p+4); + r->symnum = v & 0xFFFFFF; + v >>= 24; + r->pcrel = v&1; + v >>= 1; + r->length = 1<<(v&3); + v >>= 2; + r->extrn = v&1; + v >>= 1; + r->type = v; + } + sect->rel = rel; + free(buf); + return 0; +} + +int +macholoadsym(Macho *m, MachoSymtab *symtab) +{ + char *strbuf; + uchar *symbuf, *p; + int i, n, symsize; + MachoSym *sym, *s; + uint32 v; + + if(symtab->sym != nil) + return 0; + + strbuf = mallocz(symtab->strsize, 1); + if(strbuf == nil) + return -1; + if(seek(m->fd, symtab->stroff, 0) < 0 || readn(m->fd, strbuf, symtab->strsize) != symtab->strsize) { + free(strbuf); + return -1; + } + + symsize = 12; + if(m->is64) + symsize = 16; + n = symtab->nsym * symsize; + symbuf = mallocz(n, 1); + if(symbuf == nil) { + free(strbuf); + return -1; + } + if(seek(m->fd, symtab->symoff, 0) < 0 || readn(m->fd, symbuf, n) != n) { + free(strbuf); + free(symbuf); + return -1; + } + sym = mallocz(symtab->nsym * sizeof sym[0], 1); + if(sym == nil) { + free(strbuf); + free(symbuf); + return -1; + } + p = symbuf; + for(i=0; insym; i++) { + s = &sym[i]; + v = m->e4(p); + if(v >= symtab->strsize) { + free(strbuf); + free(symbuf); + free(sym); + return -1; + } + s->name = strbuf + v; + s->type = p[4]; + s->sectnum = p[5]; + s->desc = m->e2(p+6); + if(m->is64) + s->value = m->e8(p+8); + else + s->value = m->e4(p+8); + p += symsize; + } + symtab->str = strbuf; + symtab->sym = sym; + free(symbuf); + return 0; +} Macho* machoinit(int fd) { - int i; + int i, is64; uchar hdr[7*4], *cmdp; - u32int (*e4)(uchar*); + uchar tmp[4]; + uint16 (*e2)(uchar*); + uint32 (*e4)(uchar*); + uint64 (*e8)(uchar*); ulong ncmd, cmdsz, ty, sz, off; Macho *m; if(seek(fd, 0, 0) < 0 || readn(fd, hdr, sizeof hdr) != sizeof hdr) return nil; - if(beload4(hdr) == 0xFEEDFACE) + if((beload4(hdr)&~1) == 0xFEEDFACE){ + e2 = beload2; e4 = beload4; - else if(leload4(hdr) == 0xFEEDFACE) + e8 = beload8; + }else if((leload4(hdr)&~1) == 0xFEEDFACE){ + e2 = leload2; e4 = leload4; - else{ + e8 = leload8; + }else{ werrstr("bad magic - not mach-o file"); return nil; } - + is64 = e4(hdr) == 0xFEEDFACF; ncmd = e4(hdr+4*4); cmdsz = e4(hdr+5*4); if(ncmd > 0x10000 || cmdsz >= 0x01000000){ werrstr("implausible mach-o header ncmd=%lud cmdsz=%lud", ncmd, cmdsz); return nil; } + if(is64) + readn(fd, tmp, 4); // skip reserved word in header m = mallocz(sizeof(*m)+ncmd*sizeof(MachoCmd)+cmdsz, 1); if(m == nil) return nil; m->fd = fd; + m->e2 = e2; m->e4 = e4; + m->e8 = e8; m->cputype = e4(hdr+1*4); m->subcputype = e4(hdr+2*4); m->filetype = e4(hdr+3*4); m->ncmd = ncmd; m->flags = e4(hdr+6*4); + m->is64 = is64; m->cmd = (MachoCmd*)(m+1); off = sizeof hdr; @@ -112,11 +335,10 @@ machoinit(int fd) ty = e4(cmdp); sz = e4(cmdp+4); m->cmd[i].off = off; - unpackseg(cmdp, m, &m->cmd[i], ty, sz); + unpackcmd(cmdp, m, &m->cmd[i], ty, sz); cmdp += sz; off += sz; } - return m; } diff --git a/src/libmach/macho.h b/src/libmach/macho.h index d2a1a2e8..2b449f04 100644 --- a/src/libmach/macho.h +++ b/src/libmach/macho.h @@ -1,11 +1,18 @@ typedef struct Macho Macho; typedef struct MachoCmd MachoCmd; +typedef struct MachoSeg MachoSeg; +typedef struct MachoSect MachoSect; +typedef struct MachoRel MachoRel; +typedef struct MachoSymtab MachoSymtab; +typedef struct MachoSym MachoSym; +typedef struct MachoDysymtab MachoDysymtab; enum { MachoCpuVax = 1, MachoCpu68000 = 6, MachoCpu386 = 7, + MachoCpuAmd64 = 0x1000007, MachoCpuMips = 8, MachoCpu98000 = 10, MachoCpuHppa = 11, @@ -20,6 +27,8 @@ enum MachoCmdSymtab = 2, MachoCmdSymseg = 3, MachoCmdThread = 4, + MachoCmdDysymtab = 11, + MachoCmdSegment64 = 25, MachoFileObject = 1, MachoFileExecutable = 2, @@ -28,40 +37,111 @@ enum MachoFilePreload = 5 }; +struct MachoSeg +{ + char name[16+1]; + uint64 vmaddr; + uint64 vmsize; + uint32 fileoff; + uint32 filesz; + uint32 maxprot; + uint32 initprot; + uint32 nsect; + uint32 flags; + MachoSect *sect; +}; + +struct MachoSect +{ + char name[16+1]; + char segname[16+1]; + uint64 addr; + uint64 size; + uint32 offset; + uint32 align; + uint32 reloff; + uint32 nreloc; + uint32 flags; + + MachoRel *rel; +}; + +struct MachoRel +{ + uint32 addr; + uint32 symnum; + uint8 pcrel; + uint8 length; + uint8 extrn; + uint8 type; +}; + +struct MachoSymtab +{ + uint32 symoff; + uint32 nsym; + uint32 stroff; + uint32 strsize; + + char *str; + MachoSym *sym; +}; + +struct MachoSym +{ + char *name; + uint8 type; + uint8 sectnum; + uint16 desc; + char kind; + uint64 value; +}; + +struct MachoDysymtab +{ + uint32 ilocalsym; + uint32 nlocalsym; + uint32 iextdefsym; + uint32 nextdefsym; + uint32 iundefsym; + uint32 nundefsym; + uint32 tocoff; + uint32 ntoc; + uint32 modtaboff; + uint32 nmodtab; + uint32 extrefsymoff; + uint32 nextrefsyms; + uint32 indirectsymoff; + uint32 nindirectsyms; + uint32 extreloff; + uint32 nextrel; + uint32 locreloff; + uint32 nlocrel; +}; + struct MachoCmd { int type; - ulong off; - ulong size; - struct { - char name[16+1]; - ulong vmaddr; - ulong vmsize; - ulong fileoff; - ulong filesz; - ulong maxprot; - ulong initprot; - ulong nsect; - ulong flags; - } seg; - struct { - ulong symoff; - ulong nsyms; - ulong stroff; - ulong strsize; - } sym; + uint32 off; + uint32 size; + MachoSeg seg; + MachoSymtab sym; + MachoDysymtab dsym; }; struct Macho { int fd; + int is64; uint cputype; uint subcputype; - ulong filetype; - ulong flags; + uint32 filetype; + uint32 flags; MachoCmd *cmd; uint ncmd; - u32int (*e4)(uchar*); + uint16 (*e2)(uchar*); + uint32 (*e4)(uchar*); + uint64 (*e8)(uchar*); int (*coreregs)(Macho*, uchar**); }; @@ -69,3 +149,5 @@ Macho *machoopen(char*); Macho *machoinit(int); void machoclose(Macho*); int coreregsmachopower(Macho*, uchar**); +int macholoadrel(Macho*, MachoSect*); +int macholoadsym(Macho*, MachoSymtab*); diff --git a/src/libmach/mkfile b/src/libmach/mkfile index c218c2cf..3d3a6780 100644 --- a/src/libmach/mkfile +++ b/src/libmach/mkfile @@ -71,6 +71,9 @@ elfnm: elfnm.o $LIBDIR/$LIB demangler: demangler.o $LIBDIR/$LIB $LD -o $target $prereq -l9 +machodump: machodump.o $LIBDIR/$LIB + $LD -o $target $prereq -l9 + SunOS.$O: nosys.c Darwin.$O: nosys.c