#include #include #include enum{ Nfont = 11, Wid = 20, /* tmac.anhtml sets page width to 20" so we can recognize .nf text */ }; typedef ulong Char; typedef struct Troffchar Troffchar; typedef struct Htmlchar Htmlchar; typedef struct Font Font; typedef struct HTMLfont HTMLfont; /* a Char is 32 bits. low 16 bits are the rune. higher are attributes */ enum { Italic = 16, Bold, CW, Indent1, Indent2, Indent3, Heading = 25, Anchor = 26, /* must be last */ }; enum /* magic emissions */ { Estring = 0, Epp = 1<<16, }; int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW }; int nest[10]; int nnest; struct Troffchar { char *name; char *value; }; struct Htmlchar { char *utf; char *name; int value; }; #include "chars.h" struct Font{ char *name; HTMLfont *htmlfont; }; struct HTMLfont{ char *name; char *htmlname; int bit; }; /* R must be first; it's the default representation for fonts we don't recognize */ HTMLfont htmlfonts[] = { "R", nil, 0, "LucidaSans", nil, 0, "I", "i", Italic, "LucidaSansI", "i", Italic, "CW", "tt", CW, "LucidaCW", "tt", CW, nil, nil, }; #define TABLE "" char* onattr[8*sizeof(ulong)] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", /* italic */ "", /* bold */ "", /* cw */ "<+table border=0 cellpadding=0 cellspacing=0>
\n", /* indent1 */ "<+table border=0 cellpadding=0 cellspacing=0>
\n", /* indent2 */ "<+table border=0 cellpadding=0 cellspacing=0>
\n", /* indent3 */ 0, 0, 0, "

", /* heading 25 */ "", /* anchor 26 */ }; char* offattr[8*sizeof(ulong)] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "", /* italic */ "", /* bold */ "", /* cw */ "<-/table>", /* indent1 */ "<-/table>", /* indent2 */ "<-/table>", /* indent3 */ 0, 0, 0, "", /* heading 25 */ "", /* anchor 26 */ }; Font *font[Nfont]; Biobuf bout; int debug = 0; /* troff state */ int page = 1; int ft = 1; int vp = 0; int hp = 0; int ps = 1; int res = 720; int didP = 0; int atnewline = 1; int prevlineH = 0; ulong attr = 0; /* or'ed into each Char */ Char *chars; int nchars; int nalloc; char** anchors; /* allocated in order */ int nanchors; char *pagename; char *section; char *filename; int cno; char buf[8192]; char *title = "Plan 9 man page"; void process(Biobuf*, char*); void mountfont(int, char*); void switchfont(int); void header(char*); void flush(void); void trailer(void); void* emalloc(ulong n) { void *p; p = malloc(n); if(p == nil) sysfatal("malloc failed: %r"); return p; } void* erealloc(void *p, ulong n) { p = realloc(p, n); if(p == nil) sysfatal("realloc failed: %r"); return p; } char* estrdup(char *s) { char *t; t = strdup(s); if(t == nil) sysfatal("strdup failed: %r"); return t; } void usage(void) { fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n"); exits("usage"); } int hccmp(const void *va, const void *vb) { Htmlchar *a, *b; a = (Htmlchar*)va; b = (Htmlchar*)vb; return a->value - b->value; } void main(int argc, char *argv[]) { int i; Biobuf in, *inp; Rune r; for(i=0; i"); }else{ for(i=0; i 0){ if(c == 0x2018 && (chars[nchars-1]&0xFFFF) == 0x2018 && a==(chars[nchars-1]&~0xFFFF)){ chars[nchars-1] = (ul&~0xFFFF) | 0x201C; return; } if(c == 0x2019 && (chars[nchars-1]&0xFFFF) == 0x2019 && a==(chars[nchars-1]&~0xFFFF)){ chars[nchars-1] = (ul&~0xFFFF) | 0x201D; return; } } } chars[nchars++] = ul; } void emit(Rune r) { emitul(r | attr, 0); /* * Close man page references early, so that * .IR proof (1), * doesn't make the comma part of the link. */ if(r == ')') attr &= ~(1< 60 && r == ' ') r = '\n'; if(r >= 0x80) Bprint(b, "&#%d;", r); else Bputrune(b, r); if(r == '\n'){ for(i=0; i=i; j--) iputs(&bout, offattr[nest[j]]); /* turn on everything we just turned off but didn't want to */ for(j=i; j

"); iputrune(&bout, '\n'); continue; } a = c & ~0xFFFF; c &= 0xFFFF; /* * If we're going to something off after a space, * let's just turn it off before. */ if(c==' ' && i= 32) a ^= a & ~chars[i+1]; setattr(a); if(c == Estring){ /* next word is string to print */ iputs(&bout, (char*)chars[++i]); continue; } iputrune(&bout, c & 0xFFFF); } } void header(char *s) { Bprint(&bout, "\n"); Bprint(&bout, "%s\n", s); Bprint(&bout, "\n"); Bprint(&bout, "\n"); Bprint(&bout, "\n"); Bprint(&bout, "\n"); Bprint(&bout, "
\n"); Bprint(&bout, "
\n"); if(pagename && section){ Bprint(&bout, "
%s(%s)%s(%s)\n", pagename, section, pagename, section); } Bprint(&bout, "
\n"); } void trailer(void) { Bprint(&bout, "\n"); Bprint(&bout, "
\n"); Bprint(&bout, "
\n"); #ifdef LUCENT { Tm *t; t = localtime(time(nil)); Bprint(&bout, TABLE "\n"); Bprint(&bout, "\n"); Bprint(&bout, "Portions Copyright © %d Lucent Technologies. All rights reserved.\n", t->year+1900); } #endif Bprint(&bout, "\n"); Bprint(&bout, "\n"); } int getc(Biobuf *b) { cno++; return Bgetrune(b); } void ungetc(Biobuf *b) { cno--; Bungetrune(b); } char* getline(Biobuf *b) { int i, c; for(i=0; i 2) fprint(2, "set %s = %d\n", name, i); if(min<=i && i"); return; case 's': /* stop */ return; case 't': /* trailer */ return; case 'T': if(nfld!=2 || strcmp(fld[1], "utf")!=0) sysfatal("output for unknown typesetter type %s", fld[1]); return; case 'X': if(nfld<3 || strcmp(fld[1], "html")!=0) break; /* is it a man reference of the form cp(1)? */ /* X manref start/end cp (1) */ if(nfld==6 && strcmp(fld[2], "manref")==0){ /* was the right macro; is it the right form? */ if(strlen(fld[5])>=3 && fld[5][0]=='(' && fld[5][2]==')' && '0'<=fld[5][1] && fld[5][1]<='9'){ if(strcmp(fld[3], "start") == 0){ /* set anchor attribute and remember string */ attr |= (1<", fld[5][1], fld[4]); nanchors++; anchors = erealloc(anchors, nanchors*sizeof(char*)); anchors[nanchors-1] = estrdup(buf); }else if(strcmp(fld[3], "end") == 0) attr &= ~(1<=5 && strcmp(fld[2], "manhead") == 0){ pagename = strdup(fld[3]); section = strdup(fld[4]); }else if(nfld<4 || strcmp(fld[2], "manref")!=0){ if(nfld>2 && strcmp(fld[2], "

")==0){ /* avoid triggering extra
*/ didP = 1; /* clear all font attributes before paragraph */ emitul(' ' | (attr & ~(0xFFFF|((1<"); /* next emittec char will turn font attributes back on */ }else if(nfld>2 && strcmp(fld[2], "

")==0) attr |= (1<2 && strcmp(fld[2], "

")==0) attr &= ~(1< tab[mid].value) low = mid + 1; else return mid; } return -1; /* no match */ } void emithtmlchar(int r) { int i; i = lookup(r, htmlchars, nelem(htmlchars)); if(i >= 0) emitstr(htmlchars[i].name); else emit(r); } char* troffchar(char *s) { int i; for(i=0; troffchars[i].name!=nil; i++) if(strcmp(s, troffchars[i].name) == 0) return troffchars[i].value; return "??"; } void indent(void) { int nind; didP = 0; if(atnewline){ if(hp != prevlineH){ prevlineH = hp; /* these most peculiar numbers appear in the troff -man output */ nind = ((prevlineH-1*res)+323)/324; attr &= ~((1<= 1) attr |= (1<= 2) attr |= (1<= 3) attr |= (1<2*72) for(i=0; i"); emit('\n'); break; case 'p': page = setnum(b, "ps", -10000, 10000); break; case 's': ps = setnum(b, "ps", 1, 1000); break; case 'v': vp += setnum(b, "vpos", -10000, 10000); /* BUG: ignore motion */ break; case 'x': xcmd(b); break; case 'w': emit(' '); break; case 'C': indent(); p = getstr(b); emitstr(troffchar(p)); break; case 'H': hp = setnum(b, "hpos", 0, 20000); //Bprint(&bout, " H=%d ", hp); break; case 'V': vp = setnum(b, "vpos", 0, 10000); break; default: fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno); return; } } } HTMLfont* htmlfont(char *name) { int i; for(i=0; htmlfonts[i].name!=nil; i++) if(strcmp(name, htmlfonts[i].name) == 0) return &htmlfonts[i]; return &htmlfonts[0]; } void mountfont(int pos, char *name) { if(debug) fprint(2, "mount font %s on %d\n", name, pos); if(font[pos] != nil){ free(font[pos]->name); free(font[pos]); } font[pos] = emalloc(sizeof(Font)); font[pos]->name = estrdup(name); font[pos]->htmlfont = htmlfont(name); } void switchfont(int pos) { HTMLfont *hf; if(debug) fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name); if(pos == ft) return; hf = font[ft]->htmlfont; if(hf->bit != 0) attr &= ~(1<bit); ft = pos; hf = font[ft]->htmlfont; if(hf->bit != 0) attr |= (1<bit); }