plan9port/src/cmd/deroff.c
2005-05-07 22:42:06 +00:00

969 lines
14 KiB
C

#include <u.h>
#include <libc.h>
#include <bio.h>
/*
* Deroff command -- strip troff, eqn, and tbl sequences from
* a file. Has three flags argument, -w, to cause output one word per line
* rather than in the original format.
* -mm (or -ms) causes the corresponding macro's to be interpreted
* so that just sentences are output
* -ml also gets rid of lists.
* -i causes deroff to ignore .so and .nx commands.
* Deroff follows .so and .nx commands, removes contents of macro
* definitions, equations (both .EQ ... .EN and $...$),
* Tbl command sequences, and Troff backslash vconstructions.
*
* All input is through the C macro; the most recently read character is in c.
*/
/*
#define C ((c = Bgetrune(infile)) < 0?\
eof():\
((c == ldelim) && (filesp == files)?\
skeqn():\
(c == '\n'?\
(linect++,c):\
c)))
#define C1 ((c = Bgetrune(infile)) == Beof?\
eof():\
(c == '\n'?\
(linect++,c):\
c))
*/
/* lose those macros! */
#define C fC()
#define C1 fC1()
#define SKIP while(C != '\n')
#define SKIP1 while(C1 != '\n')
#define SKIP_TO_COM SKIP;\
SKIP;\
pc=c;\
while(C != '.' || pc != '\n' || C > 'Z')\
pc=c
#define YES 1
#define NO 0
#define MS 0
#define MM 1
#define ONE 1
#define TWO 2
#define NOCHAR -2
#define EXTENDED -1 /* All runes above 0x7F */
#define SPECIAL 0
#define APOS 1
#define PUNCT 2
#define DIGIT 3
#define LETTER 4
int linect = 0;
int wordflag= NO;
int underscoreflag = NO;
int msflag = NO;
int iflag = NO;
int mac = MM;
int disp = 0;
int inmacro = NO;
int intable = NO;
int eqnflag = 0;
#define MAX_ASCII 0X80
char chars[MAX_ASCII]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
Rune line[30000];
Rune* lp;
long c;
long pc;
int ldelim = NOCHAR;
int rdelim = NOCHAR;
char** argv;
char fname[50];
Biobuf* files[15];
Biobuf**filesp;
Biobuf* infile;
char* devnull = "/dev/null";
Biobuf *infile;
Biobuf bout;
long skeqn(void);
Biobuf* opn(char *p);
int eof(void);
int charclass(int);
void getfname(void);
void fatal(char *s, char *p);
void usage(void);
void work(void);
void putmac(Rune *rp, int vconst);
void regline(int macline, int vconst);
void putwords(void);
void comline(void);
void macro(void);
void eqn(void);
void tbl(void);
void stbl(void);
void sdis(char a1, char a2);
void sce(void);
void backsl(void);
char* copys(char *s);
void refer(int c1);
void inpic(void);
int
fC(void)
{
c = Bgetrune(infile);
if(c < 0)
return eof();
if(c == ldelim && filesp == files)
return skeqn();
if(c == '\n')
linect++;
return c;
}
int
fC1(void)
{
c = Bgetrune(infile);
if(c == Beof)
return eof();
if(c == '\n')
linect++;
return c;
}
void
main(int argc, char *av[])
{
int i;
char *f;
argv = av;
Binit(&bout, 1, OWRITE);
ARGBEGIN{
case 'w':
wordflag = YES;
break;
case '_':
wordflag = YES;
underscoreflag = YES;
break;
case 'm':
msflag = YES;
if(f = ARGF())
switch(*f)
{
case 'm': mac = MM; break;
case 's': mac = MS; break;
case 'l': disp = 1; break;
default: usage();
}
else
usage();
break;
case 'i':
iflag = YES;
break;
default:
usage();
}ARGEND
if(*argv)
infile = opn(*argv++);
else{
infile = malloc(sizeof(Biobuf));
Binit(infile, 0, OREAD);
}
files[0] = infile;
filesp = &files[0];
for(i='a'; i<='z' ; ++i)
chars[i] = LETTER;
for(i='A'; i<='Z'; ++i)
chars[i] = LETTER;
for(i='0'; i<='9'; ++i)
chars[i] = DIGIT;
chars['\''] = APOS;
chars['&'] = APOS;
chars['\b'] = APOS;
chars['.'] = PUNCT;
chars[','] = PUNCT;
chars[';'] = PUNCT;
chars['?'] = PUNCT;
chars[':'] = PUNCT;
work();
}
long
skeqn(void)
{
while(C1 != rdelim)
if(c == '\\')
c = C1;
else if(c == '"')
while(C1 != '"')
if(c == '\\')
C1;
if (msflag)
eqnflag = 1;
return(c = ' ');
}
Biobuf*
opn(char *p)
{
Biobuf *fd;
while ((fd = Bopen(p, OREAD)) == 0) {
if(msflag || p == devnull)
fatal("Cannot open file %s - quitting\n", p);
else {
fprint(2, "Deroff: Cannot open file %s - continuing\n", p);
p = devnull;
}
}
linect = 0;
return(fd);
}
int
eof(void)
{
if(Bfildes(infile) != 0)
Bterm(infile);
if(filesp > files)
infile = *--filesp;
else
if(*argv)
infile = opn(*argv++);
else
exits(0);
return(C);
}
void
getfname(void)
{
char *p;
Rune r;
Dir *dir;
struct chain
{
struct chain* nextp;
char* datap;
} *q;
static struct chain *namechain= 0;
while(C == ' ')
;
for(p = fname; (r=c) != '\n' && r != ' ' && r != '\t' && r != '\\'; C)
p += runetochar(p, &r);
*p = '\0';
while(c != '\n')
C;
if(!strcmp(fname, "/sys/lib/tmac/tmac.cs")
|| !strcmp(fname, "/sys/lib/tmac/tmac.s")) {
fname[0] = '\0';
return;
}
dir = dirstat(fname);
if(dir!=nil && ((dir->mode & DMDIR) || dir->type != 'M')) {
free(dir);
fname[0] = '\0';
return;
}
free(dir);
/*
* see if this name has already been used
*/
for(q = namechain; q; q = q->nextp)
if( !strcmp(fname, q->datap)) {
fname[0] = '\0';
return;
}
q = (struct chain*)malloc(sizeof(struct chain));
q->nextp = namechain;
q->datap = copys(fname);
namechain = q;
}
void
usage(void)
{
fprint(2,"usage: deroff [-nw_pi] [-m (m s l)] [file ...] \n");
exits("usage");
}
void
fatal(char *s, char *p)
{
fprint(2, "deroff: ");
fprint(2, s, p);
exits(s);
}
void
work(void)
{
for(;;) {
eqnflag = 0;
if(C == '.' || c == '\'')
comline();
else
regline(NO, TWO);
}
}
void
regline(int macline, int vconst)
{
line[0] = c;
lp = line;
for(;;) {
if(c == '\\') {
*lp = ' ';
backsl();
if(c == '%') /* no blank for hyphenation char */
lp--;
}
if(c == '\n')
break;
if(intable && c=='T') {
*++lp = C;
if(c=='{' || c=='}') {
lp[-1] = ' ';
*lp = C;
}
} else {
if(msflag == 1 && eqnflag == 1) {
eqnflag = 0;
*++lp = 'x';
}
*++lp = C;
}
}
*lp = '\0';
if(lp != line) {
if(wordflag)
putwords();
else
if(macline)
putmac(line,vconst);
else
Bprint(&bout, "%S\n", line);
}
}
void
putmac(Rune *rp, int vconst)
{
Rune *t;
int found;
Rune last;
found = 0;
last = 0;
while(*rp) {
while(*rp == ' ' || *rp == '\t')
Bputrune(&bout, *rp++);
for(t = rp; *t != ' ' && *t != '\t' && *t != '\0'; t++)
;
if(*rp == '\"')
rp++;
if(t > rp+vconst && charclass(*rp) == LETTER
&& charclass(rp[1]) == LETTER) {
while(rp < t)
if(*rp == '\"')
rp++;
else
Bputrune(&bout, *rp++);
last = t[-1];
found++;
} else
if(found && charclass(*rp) == PUNCT && rp[1] == '\0')
Bputrune(&bout, *rp++);
else {
last = t[-1];
rp = t;
}
}
Bputc(&bout, '\n');
if(msflag && charclass(last) == PUNCT)
Bprint(&bout, " %C\n", last);
}
/*
* break into words for -w option
*/
void
putwords(void)
{
Rune *p, *p1;
int i, nlet;
for(p1 = line;;) {
/*
* skip initial specials ampersands and apostrophes
*/
while((i = charclass(*p1)) != EXTENDED && i < DIGIT)
if(*p1++ == '\0')
return;
nlet = 0;
for(p = p1; (i = charclass(*p)) != SPECIAL || (underscoreflag && *p=='_'); p++)
if(i == LETTER || (underscoreflag && *p == '_'))
nlet++;
/*
* MDM definition of word
*/
if(nlet > 1) {
/*
* delete trailing ampersands and apostrophes
*/
while(*--p == '\'' || *p == '&'
|| charclass(*p) == PUNCT)
;
while(p1 <= p)
Bputrune(&bout, *p1++);
Bputc(&bout, '\n');
} else
p1 = p;
}
}
void
comline(void)
{
long c1, c2;
while(C==' ' || c=='\t')
;
comx:
if((c1=c) == '\n')
return;
c2 = C;
if(c1=='.' && c2!='.')
inmacro = NO;
if(msflag && c1 == '['){
refer(c2);
return;
}
if(c2 == '\n')
return;
if(c1 == '\\' && c2 == '\"')
SKIP;
else
if (filesp==files && c1=='E' && c2=='Q')
eqn();
else
if(filesp==files && c1=='T' && (c2=='S' || c2=='C' || c2=='&')) {
if(msflag)
stbl();
else
tbl();
}
else
if(c1=='T' && c2=='E')
intable = NO;
else if (!inmacro &&
((c1 == 'd' && c2 == 'e') ||
(c1 == 'i' && c2 == 'g') ||
(c1 == 'a' && c2 == 'm')))
macro();
else
if(c1=='s' && c2=='o') {
if(iflag)
SKIP;
else {
getfname();
if(fname[0]) {
if(infile = opn(fname))
*++filesp = infile;
else infile = *filesp;
}
}
}
else
if(c1=='n' && c2=='x')
if(iflag)
SKIP;
else {
getfname();
if(fname[0] == '\0')
exits(0);
if(Bfildes(infile) != 0)
Bterm(infile);
infile = *filesp = opn(fname);
}
else
if(c1 == 't' && c2 == 'm')
SKIP;
else
if(c1=='h' && c2=='w')
SKIP;
else
if(msflag && c1 == 'T' && c2 == 'L') {
SKIP_TO_COM;
goto comx;
}
else
if(msflag && c1=='N' && c2 == 'R')
SKIP;
else
if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
if(mac==MM)SKIP;
else {
SKIP_TO_COM;
goto comx;
}
} else
if(msflag && c1=='F' && c2=='S') {
SKIP_TO_COM;
goto comx;
}
else
if(msflag && (c1=='S' || c1=='N') && c2=='H') {
SKIP_TO_COM;
goto comx;
} else
if(c1 == 'U' && c2 == 'X') {
if(wordflag)
Bprint(&bout, "UNIX\n");
else
Bprint(&bout, "UNIX ");
} else
if(msflag && c1=='O' && c2=='K') {
SKIP_TO_COM;
goto comx;
} else
if(msflag && c1=='N' && c2=='D')
SKIP;
else
if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U'))
SKIP;
else
if(msflag && mac==MM && c2=='L') {
if(disp || c1=='R')
sdis('L', 'E');
else {
SKIP;
Bprint(&bout, " .");
}
} else
if(!msflag && c1=='P' && c2=='S') {
inpic();
} else
if(msflag && (c1=='D' || c1=='N' || c1=='K'|| c1=='P') && c2=='S') {
sdis(c1, 'E');
} else
if(msflag && (c1 == 'K' && c2 == 'F')) {
sdis(c1,'E');
} else
if(msflag && c1=='n' && c2=='f')
sdis('f','i');
else
if(msflag && c1=='c' && c2=='e')
sce();
else {
if(c1=='.' && c2=='.') {
if(msflag) {
SKIP;
return;
}
while(C == '.')
;
}
inmacro++;
if(c1 <= 'Z' && msflag)
regline(YES,ONE);
else {
if(wordflag)
C;
regline(YES,TWO);
}
inmacro--;
}
}
void
macro(void)
{
if(msflag) {
do {
SKIP1;
} while(C1 != '.' || C1 != '.' || C1 == '.');
if(c != '\n')
SKIP;
return;
}
SKIP;
inmacro = YES;
}
void
sdis(char a1, char a2)
{
int c1, c2;
int eqnf;
int lct;
if(a1 == 'P'){
while(C1 == ' ')
;
if(c == '<') {
SKIP1;
return;
}
}
lct = 0;
eqnf = 1;
if(c != '\n')
SKIP1;
for(;;) {
while(C1 != '.')
if(c == '\n')
continue;
else
SKIP1;
if((c1=C1) == '\n')
continue;
if((c2=C1) == '\n') {
if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))
return;
continue;
}
if(c1==a1 && c2 == a2) {
SKIP1;
if(lct != 0){
lct--;
continue;
}
if(eqnf)
Bprint(&bout, " .");
Bputc(&bout, '\n');
return;
} else
if(a1 == 'L' && c2 == 'L') {
lct++;
SKIP1;
} else
if(a1 == 'D' && c1 == 'E' && c2 == 'Q') {
eqn();
eqnf = 0;
} else
if(a1 == 'f') {
if((mac == MS && c2 == 'P') ||
(mac == MM && c1 == 'H' && c2 == 'U')){
SKIP1;
return;
}
SKIP1;
}
else
SKIP1;
}
}
void
tbl(void)
{
while(C != '.')
;
SKIP;
intable = YES;
}
void
stbl(void)
{
while(C != '.')
;
SKIP_TO_COM;
if(c != 'T' || C != 'E') {
SKIP;
pc = c;
while(C != '.' || pc != '\n' || C != 'T' || C != 'E')
pc = c;
}
}
void
eqn(void)
{
long c1, c2;
int dflg;
char last;
last = 0;
dflg = 1;
SKIP;
for(;;) {
if(C1 == '.' || c == '\'') {
while(C1==' ' || c=='\t')
;
if(c=='E' && C1=='N') {
SKIP;
if(msflag && dflg) {
Bputc(&bout, 'x');
Bputc(&bout, ' ');
if(last) {
Bputc(&bout, last);
Bputc(&bout, '\n');
}
}
return;
}
} else
if(c == 'd') {
if(C1=='e' && C1=='l')
if(C1=='i' && C1=='m') {
while(C1 == ' ')
;
if((c1=c)=='\n' || (c2=C1)=='\n' ||
(c1=='o' && c2=='f' && C1=='f')) {
ldelim = NOCHAR;
rdelim = NOCHAR;
} else {
ldelim = c1;
rdelim = c2;
}
}
dflg = 0;
}
if(c != '\n')
while(C1 != '\n') {
if(chars[c] == PUNCT)
last = c;
else
if(c != ' ')
last = 0;
}
}
}
/*
* skip over a complete backslash vconstruction
*/
void
backsl(void)
{
int bdelim;
sw:
switch(C1)
{
case '"':
SKIP1;
return;
case 's':
if(C1 == '\\')
backsl();
else {
while(C1>='0' && c<='9')
;
Bungetrune(infile);
c = '0';
}
lp--;
return;
case 'f':
case 'n':
case '*':
if(C1 != '(')
return;
case '(':
if(msflag) {
if(C == 'e') {
if(C1 == 'm') {
*lp = '-';
return;
}
} else
if(c != '\n')
C1;
return;
}
if(C1 != '\n')
C1;
return;
case '$':
C1; /* discard argument number */
return;
case 'b':
case 'x':
case 'v':
case 'h':
case 'w':
case 'o':
case 'l':
case 'L':
if((bdelim=C1) == '\n')
return;
while(C1!='\n' && c!=bdelim)
if(c == '\\')
backsl();
return;
case '\\':
if(inmacro)
goto sw;
default:
return;
}
}
char*
copys(char *s)
{
char *t, *t0;
if((t0 = t = malloc((strlen(s)+1))) == 0)
fatal("Cannot allocate memory", (char*)0);
while(*t++ = *s++)
;
return(t0);
}
void
sce(void)
{
int n = 1;
while (C != '\n' && !('0' <= c && c <= '9'))
;
if (c != '\n') {
for (n = c-'0';'0' <= C && c <= '9';)
n = n*10 + c-'0';
}
while(n) {
if(C == '.') {
if(C == 'c') {
if(C == 'e') {
while(C == ' ')
;
if(c == '0') {
SKIP;
break;
} else
SKIP;
} else
SKIP;
} else
if(c == 'P' || C == 'P') {
if(c != '\n')
SKIP;
break;
} else
if(c != '\n')
SKIP;
} else {
SKIP;
n--;
}
}
}
void
refer(int c1)
{
int c2;
if(c1 != '\n')
SKIP;
c2 = 0;
for(;;) {
if(C != '.')
SKIP;
else {
if(C != ']')
SKIP;
else {
while(C != '\n')
c2 = c;
if(charclass(c2) == PUNCT)
Bprint(&bout, " %C",c2);
return;
}
}
}
}
void
inpic(void)
{
int c1;
Rune *p1;
/* SKIP1;*/
while(C1 != '\n')
if(c == '<'){
SKIP1;
return;
}
p1 = line;
c = '\n';
for(;;) {
c1 = c;
if(C1 == '.' && c1 == '\n') {
if(C1 != 'P' || C1 != 'E') {
if(c != '\n'){
SKIP1;
c = '\n';
}
continue;
}
SKIP1;
return;
} else
if(c == '\"') {
while(C1 != '\"') {
if(c == '\\') {
if(C1 == '\"')
continue;
Bungetrune(infile);
backsl();
} else
*p1++ = c;
}
*p1++ = ' ';
} else
if(c == '\n' && p1 != line) {
*p1 = '\0';
if(wordflag)
putwords();
else
Bprint(&bout, "%S\n\n", line);
p1 = line;
}
}
}
int
charclass(int c)
{
if(c < MAX_ASCII)
return chars[c];
switch(c){
case 0x2013: case 0x2014: /* en dash, em dash */
return SPECIAL;
}
return EXTENDED;
}