plan9port/src/cmd/yacc.c
2003-12-04 17:45:49 +00:00

2948 lines
57 KiB
C

#include <u.h>
#include <libc.h>
#include <bio.h>
#include <ctype.h>
#define Bungetrune Bungetc /* ok for now. */
/*
* all these are 32 bit
*/
#define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
#define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
#define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
#define NWORDS(n) (((n)+32)/32)
#define PARSER "lib/yaccpar"
#define PARSERS "lib/yaccpars"
#define TEMPNAME "y.tmp.XXXXXX"
#define ACTNAME "y.acts.XXXXXX"
#define OFILE "tab.c"
#define FILEU "output"
#define FILED "tab.h"
#define FILEDEBUG "debug"
enum
{
/*
* the following are adjustable
* according to memory size
*/
ACTSIZE = 40000,
MEMSIZE = 40000,
NSTATES = 2000,
NTERMS = 511,
NPROD = 1600,
NNONTERM = 600,
TEMPSIZE = 2000,
CNAMSZ = 10000,
LSETSIZE = 2400,
WSETSIZE = 350,
NAMESIZE = 50,
NTYPES = 63,
ISIZE = 400,
PRIVATE = 0xE000, /* unicode private use */
/* relationships which must hold:
TBITSET ints must hold NTERMS+1 bits...
WSETSIZE >= NNONTERM
LSETSIZE >= NNONTERM
TEMPSIZE >= NTERMS + NNONTERM + 1
TEMPSIZE >= NSTATES
*/
NTBASE = 010000,
ERRCODE = 8190,
ACCEPTCODE = 8191,
NOASC = 0, /* no assoc. */
LASC = 1, /* left assoc. */
RASC = 2, /* right assoc. */
BASC = 3, /* binary assoc. */
/* flags for state generation */
DONE = 0,
MUSTDO = 1,
MUSTLOOKAHEAD = 2,
/* flags for a rule having an action, and being reduced */
ACTFLAG = 04,
REDFLAG = 010,
/* output parser flags */
YYFLAG1 = -1000,
/* parse tokens */
IDENTIFIER = PRIVATE,
MARK,
TERM,
LEFT,
RIGHT,
BINARY,
PREC,
LCURLY,
IDENTCOLON,
NUMBER,
START,
TYPEDEF,
TYPENAME,
UNION,
ENDFILE = 0,
EMPTY = 1,
WHOKNOWS = 0,
OK = 1,
NOMORE = -1000,
};
/* macros for getting associativity and precedence levels */
#define ASSOC(i) ((i)&03)
#define PLEVEL(i) (((i)>>4)&077)
#define TYPE(i) (((i)>>10)&077)
/* macros for setting associativity and precedence levels */
#define SETASC(i,j) i |= j
#define SETPLEV(i,j) i |= (j<<4)
#define SETTYPE(i,j) i |= (j<<10)
/* looping macros */
#define TLOOP(i) for(i=1; i<=ntokens; i++)
#define NTLOOP(i) for(i=0; i<=nnonter; i++)
#define PLOOP(s,i) for(i=s; i<nprod; i++)
#define SLOOP(i) for(i=0; i<nstate; i++)
#define WSBUMP(x) x++
#define WSLOOP(s,j) for(j=s; j<cwp; j++)
#define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
#define SETLOOP(i) for(i=0; i<tbitset; i++)
/* command to clobber tempfiles after use */
#define ZAPFILE(x) if(x) remove(x)
/* I/O descriptors */
Biobuf* faction; /* file for saving actions */
Biobuf* fdefine; /* file for #defines */
Biobuf* fdebug; /* y.debug for strings for debugging */
Biobuf* ftable; /* y.tab.c file */
Biobuf* ftemp; /* tempfile to pass 2 */
Biobuf* finput; /* input file */
Biobuf* foutput; /* y.output file */
/* communication variables between various I/O routines */
char* infile; /* input file name */
int numbval; /* value of an input number */
char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
/* structure declarations */
typedef
struct
{
int lset[TBITSET];
} Lkset;
typedef
struct
{
int* pitem;
Lkset* look;
} Item;
typedef
struct
{
char* name;
int value;
} Symb;
typedef
struct
{
int* pitem;
int flag;
Lkset ws;
} Wset;
/* storage of names */
char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
int cnamsz = CNAMSZ; /* size of cnames */
char* cnamp = cnames; /* place where next name is to be put in */
int ndefout = 4; /* number of defined symbols output */
char* tempname;
char* actname;
char ttempname[] = TEMPNAME;
char tactname[] = ACTNAME;
char* parser = PARSER;
char* yydebug;
/* storage of types */
int ntypes; /* number of types defined */
char* typeset[NTYPES]; /* pointers to type tags */
/* token information */
int ntokens = 0 ; /* number of tokens */
Symb tokset[NTERMS];
int toklev[NTERMS]; /* vector with the precedence of the terminals */
/* nonterminal information */
int nnonter = -1; /* the number of nonterminals */
Symb nontrst[NNONTERM];
int start; /* start symbol */
/* assigned token type values */
int extval = 0;
char* ytabc = OFILE; /* name of y.tab.c */
/* grammar rule information */
int mem0[MEMSIZE] ; /* production storage */
int* mem = mem0;
int nprod = 1; /* number of productions */
int* prdptr[NPROD]; /* pointers to descriptions of productions */
int levprd[NPROD]; /* precedence levels for the productions */
int rlines[NPROD]; /* line number for this rule */
/* state information */
int nstate = 0; /* number of states */
Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
int tystate[NSTATES]; /* contains type information about the states */
int defact[NSTATES]; /* the default actions of states */
int tstates[NTERMS]; /* states generated by terminal gotos */
int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
int lastred; /* the number of the last reduction of a state */
/* lookahead set information */
Lkset lkst[LSETSIZE];
int nolook; /* flag to turn off lookahead computations */
int tbitset; /* size of lookahead sets */
int nlset = 0; /* next lookahead set index */
int nolook = 0; /* flag to suppress lookahead computations */
Lkset clset; /* temporary storage for lookahead computations */
/* working set information */
Wset wsets[WSETSIZE];
Wset* cwp;
/* storage for action table */
int amem[ACTSIZE]; /* action table storage */
int* memp = amem; /* next free action table position */
int indgo[NSTATES]; /* index to the stored goto table */
/* temporary vector, indexable by states, terms, or ntokens */
int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
int lineno = 1; /* current input line number */
int fatfl = 1; /* if on, error is fatal */
int nerrors = 0; /* number of errors */
/* statistics collection variables */
int zzgoent;
int zzgobest;
int zzacent;
int zzexcp;
int zzclose;
int zzrrconf;
int zzsrconf;
int* ggreed = lkst[0].lset;
int* pgo = wsets[0].ws.lset;
int* yypgo = &nontrst[0].value;
int maxspr = 0; /* maximum spread of any entry */
int maxoff = 0; /* maximum offset into a array */
int* pmem = mem0;
int* maxa;
int nxdb = 0;
int adb = 0;
/* storage for information about the nonterminals */
int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
/* random stuff picked out from between functions */
int indebug = 0;
Wset* zzcwp = wsets;
int zzgoent = 0;
int zzgobest = 0;
int zzacent = 0;
int zzexcp = 0;
int zzclose = 0;
int zzsrconf = 0;
int* zzmemsz = mem0;
int zzrrconf = 0;
int pidebug = 0; /* debugging flag for putitem */
int gsdebug = 0;
int cldebug = 0; /* debugging flag for closure */
int pkdebug = 0;
int g2debug = 0;
struct
{
char* name;
long value;
} resrv[] =
{
"binary", BINARY,
"left", LEFT,
"nonassoc", BINARY,
"prec", PREC,
"right", RIGHT,
"start", START,
"term", TERM,
"token", TERM,
"type", TYPEDEF,
"union", UNION,
0,
};
/* define functions */
void main(int, char**);
void others(void);
char* chcopy(char*, char*);
char* writem(int*);
char* symnam(int);
void summary(void);
void error(char*, ...);
void aryfil(int*, int, int);
int setunion(int*, int*);
void prlook(Lkset*);
void cpres(void);
void cpfir(void);
int state(int);
void putitem(int*, Lkset*);
void cempty(void);
void stagen(void);
void closure(int);
Lkset* flset(Lkset*);
void cleantmp(void);
void intr(void);
void setup(int, char**);
void finact(void);
int defin(int, char*);
void defout(int);
char* cstash(char*);
long gettok(void);
int fdtype(int);
int chfind(int, char*);
void cpyunion(void);
void cpycode(void);
int skipcom(void);
void cpyact(int);
void openup(char*, int, int, int, char*);
void output(void);
int apack(int*, int);
void go2out(void);
void go2gen(int);
void precftn(int, int, int);
void wract(int);
void wrstate(int);
void warray(char*, int*, int);
void hideprod(void);
void callopt(void);
void gin(int);
void stin(int);
int nxti(void);
void osummary(void);
void aoutput(void);
void arout(char*, int*, int);
int gtnm(void);
void
main(int argc, char *argv[])
{
setup(argc, argv); /* initialize and read productions */
tbitset = NWORDS(ntokens);
cpres(); /* make table of which productions yield a given nonterminal */
cempty(); /* make a table of which nonterminals can match the empty string */
cpfir(); /* make a table of firsts of nonterminals */
stagen(); /* generate the states */
output(); /* write the states and the tables */
go2out();
hideprod();
summary();
callopt();
others();
exits(0);
}
/*
* put out other arrays, copy the parsers
*/
void
others(void)
{
int c, i, j;
char *s, *root;
root = getenv("PLAN9");
if(root == nil)
root = "/usr/local/plan9";
s = malloc(strlen(root)+1+strlen(parser)+1);
strcpy(s, root);
strcat(s, "/");
strcat(s, parser);
finput = Bopen(s, OREAD);
if(finput == 0)
error("cannot find parser %s", s);
free(s);
warray("yyr1", levprd, nprod);
aryfil(temp1, nprod, 0);
PLOOP(1, i)
temp1[i] = prdptr[i+1]-prdptr[i]-2;
warray("yyr2", temp1, nprod);
aryfil(temp1, nstate, -1000);
TLOOP(i)
for(j=tstates[i]; j!=0; j=mstates[j])
temp1[j] = i;
NTLOOP(i)
for(j=ntstates[i]; j!=0; j=mstates[j])
temp1[j] = -i;
warray("yychk", temp1, nstate);
warray("yydef", defact, nstate);
/* put out token translation tables */
/* table 1 has 0-256 */
aryfil(temp1, 256, 0);
c = 0;
TLOOP(i) {
j = tokset[i].value;
if(j >= 0 && j < 256) {
if(temp1[j]) {
print("yacc bug -- cant have 2 different Ts with same value\n");
print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
nerrors++;
}
temp1[j] = i;
if(j > c)
c = j;
}
}
warray("yytok1", temp1, c+1);
/* table 2 has PRIVATE-PRIVATE+256 */
aryfil(temp1, 256, 0);
c = 0;
TLOOP(i) {
j = tokset[i].value - PRIVATE;
if(j >= 0 && j < 256) {
if(temp1[j]) {
print("yacc bug -- cant have 2 different Ts with same value\n");
print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
nerrors++;
}
temp1[j] = i;
if(j > c)
c = j;
}
}
warray("yytok2", temp1, c+1);
/* table 3 has everything else */
Bprint(ftable, "long yytok3[] =\n{\n");
c = 0;
TLOOP(i) {
j = tokset[i].value;
if(j >= 0 && j < 256)
continue;
if(j >= PRIVATE && j < 256+PRIVATE)
continue;
Bprint(ftable, "%4d,%4d,", j, i);
c++;
if(c%5 == 0)
Bprint(ftable, "\n");
}
Bprint(ftable, "%4d\n};\n", 0);
/* copy parser text */
while((c=Bgetrune(finput)) != Beof) {
if(c == '$') {
if((c = Bgetrune(finput)) != 'A')
Bputrune(ftable, '$');
else { /* copy actions */
faction = Bopen(actname, OREAD);
if(faction == 0)
error("cannot reopen action tempfile");
while((c=Bgetrune(faction)) != Beof)
Bputrune(ftable, c);
Bterm(faction);
ZAPFILE(actname);
c = Bgetrune(finput);
}
}
Bputrune(ftable, c);
}
Bterm(ftable);
}
/*
* copies string q into p, returning next free char ptr
*/
char*
chcopy(char* p, char* q)
{
int c;
while(c = *q) {
if(c == '"')
*p++ = '\\';
*p++ = c;
q++;
}
*p = 0;
return p;
}
/*
* creates output string for item pointed to by pp
*/
char*
writem(int *pp)
{
int i,*p;
static char sarr[ISIZE];
char* q;
for(p=pp; *p>0; p++)
;
p = prdptr[-*p];
q = chcopy(sarr, nontrst[*p-NTBASE].name);
q = chcopy(q, ": ");
for(;;) {
*q = ' ';
p++;
if(p == pp)
*q = '.';
q++;
*q = '\0';
i = *p;
if(i <= 0)
break;
q = chcopy(q, symnam(i));
if(q > &sarr[ISIZE-30])
error("item too big");
}
/* an item calling for a reduction */
i = *pp;
if(i < 0 ) {
q = chcopy(q, " (");
sprint(q, "%d)", -i);
}
return sarr;
}
/*
* return a pointer to the name of symbol i
*/
char*
symnam(int i)
{
char* cp;
cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
if(*cp == ' ')
cp++;
return cp;
}
/*
* output the summary on y.output
*/
void
summary(void)
{
if(foutput != 0) {
Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
ntokens, NTERMS, nnonter, NNONTERM);
Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
nprod, NPROD, nstate, NSTATES);
Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
zzsrconf, zzrrconf);
Bprint(foutput, "%d/%d working sets used\n",
(int)(zzcwp-wsets), WSETSIZE);
Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
(int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
Bprint(foutput, "%d goto entries\n", zzgoent);
Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
}
if(zzsrconf != 0 || zzrrconf != 0) {
print("\nconflicts: ");
if(zzsrconf)
print("%d shift/reduce", zzsrconf);
if(zzsrconf && zzrrconf)
print(", ");
if(zzrrconf)
print("%d reduce/reduce", zzrrconf);
print("\n");
}
if(ftemp != 0) {
Bterm(ftemp);
ftemp = 0;
}
if(fdefine != 0) {
Bterm(fdefine);
fdefine = 0;
}
}
/*
* write out error comment -- NEEDS WORK
*/
void
error(char *s, ...)
{
nerrors++;
fprint(2, "\n fatal error:");
fprint(2, s, (&s)[1]);
fprint(2, ", %s:%d\n", infile, lineno);
if(!fatfl)
return;
summary();
cleantmp();
exits("error");
}
/*
* set elements 0 through n-1 to c
*/
void
aryfil(int *v, int n, int c)
{
int i;
for(i=0; i<n; i++)
v[i] = c;
}
/*
* set a to the union of a and b
* return 1 if b is not a subset of a, 0 otherwise
*/
int
setunion(int *a, int *b)
{
int i, x, sub;
sub = 0;
SETLOOP(i) {
x = *a;
*a |= *b;
if(*a != x)
sub = 1;
a++;
b++;
}
return sub;
}
void
prlook(Lkset* p)
{
int j, *pp;
pp = p->lset;
if(pp == 0)
Bprint(foutput, "\tNULL");
else {
Bprint(foutput, " { ");
TLOOP(j)
if(BIT(pp,j))
Bprint(foutput, "%s ", symnam(j));
Bprint(foutput, "}");
}
}
/*
* compute an array with the beginnings of productions yielding given nonterminals
* The array pres points to these lists
* the array pyield has the lists: the total size is only NPROD+1
*/
void
cpres(void)
{
int c, j, i, **pmem;
static int *pyield[NPROD];
pmem = pyield;
NTLOOP(i) {
c = i+NTBASE;
pres[i] = pmem;
fatfl = 0; /* make undefined symbols nonfatal */
PLOOP(0, j)
if(*prdptr[j] == c)
*pmem++ = prdptr[j]+1;
if(pres[i] == pmem)
error("nonterminal %s not defined!", nontrst[i].name);
}
pres[i] = pmem;
fatfl = 1;
if(nerrors) {
summary();
cleantmp();
exits("error");
}
if(pmem != &pyield[nprod])
error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
}
/*
* compute an array with the first of nonterminals
*/
void
cpfir(void)
{
int *p, **s, i, **t, ch, changes;
zzcwp = &wsets[nnonter];
NTLOOP(i) {
aryfil(wsets[i].ws.lset, tbitset, 0);
t = pres[i+1];
/* initially fill the sets */
for(s=pres[i]; s<t; ++s)
for(p = *s; (ch = *p) > 0; ++p) {
if(ch < NTBASE) {
SETBIT(wsets[i].ws.lset, ch);
break;
}
if(!pempty[ch-NTBASE])
break;
}
}
/* now, reflect transitivity */
changes = 1;
while(changes) {
changes = 0;
NTLOOP(i) {
t = pres[i+1];
for(s = pres[i]; s < t; ++s)
for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
if(!pempty[ch])
break;
}
}
}
NTLOOP(i)
pfirst[i] = flset(&wsets[i].ws);
if(!indebug)
return;
if(foutput != 0)
NTLOOP(i) {
Bprint(foutput, "\n%s: ", nontrst[i].name);
prlook(pfirst[i]);
Bprint(foutput, " %d\n", pempty[i]);
}
}
/*
* sorts last state,and sees if it equals earlier ones. returns state number
*/
int
state(int c)
{
Item *p1, *p2, *k, *l, *q1, *q2;
int size1, size2, i;
p1 = pstate[nstate];
p2 = pstate[nstate+1];
if(p1 == p2)
return 0; /* null state */
/* sort the items */
for(k = p2-1; k > p1; k--) /* make k the biggest */
for(l = k-1; l >= p1; --l)
if(l->pitem > k->pitem) {
int *s;
Lkset *ss;
s = k->pitem;
k->pitem = l->pitem;
l->pitem = s;
ss = k->look;
k->look = l->look;
l->look = ss;
}
size1 = p2 - p1; /* size of state */
for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
/* get ith state */
q1 = pstate[i];
q2 = pstate[i+1];
size2 = q2 - q1;
if(size1 != size2)
continue;
k = p1;
for(l = q1; l < q2; l++) {
if(l->pitem != k->pitem)
break;
k++;
}
if(l != q2)
continue;
/* found it */
pstate[nstate+1] = pstate[nstate]; /* delete last state */
/* fix up lookaheads */
if(nolook)
return i;
for(l = q1, k = p1; l < q2; ++l, ++k ) {
int s;
SETLOOP(s)
clset.lset[s] = l->look->lset[s];
if(setunion(clset.lset, k->look->lset)) {
tystate[i] = MUSTDO;
/* register the new set */
l->look = flset( &clset );
}
}
return i;
}
/* state is new */
if(nolook)
error("yacc state/nolook error");
pstate[nstate+2] = p2;
if(nstate+1 >= NSTATES)
error("too many states");
if(c >= NTBASE) {
mstates[nstate] = ntstates[c-NTBASE];
ntstates[c-NTBASE] = nstate;
} else {
mstates[nstate] = tstates[c];
tstates[c] = nstate;
}
tystate[nstate] = MUSTDO;
return nstate++;
}
void
putitem(int *ptr, Lkset *lptr)
{
Item *j;
if(pidebug && foutput != 0)
Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
j = pstate[nstate+1];
j->pitem = ptr;
if(!nolook)
j->look = flset(lptr);
pstate[nstate+1] = ++j;
if((int*)j > zzmemsz) {
zzmemsz = (int*)j;
if(zzmemsz >= &mem0[MEMSIZE])
error("out of state space");
}
}
/*
* mark nonterminals which derive the empty string
* also, look for nonterminals which don't derive any token strings
*/
void
cempty(void)
{
int i, *p;
/* first, use the array pempty to detect productions that can never be reduced */
/* set pempty to WHONOWS */
aryfil(pempty, nnonter+1, WHOKNOWS);
/* now, look at productions, marking nonterminals which derive something */
more:
PLOOP(0, i) {
if(pempty[*prdptr[i] - NTBASE])
continue;
for(p = prdptr[i]+1; *p >= 0; ++p)
if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
break;
/* production can be derived */
if(*p < 0) {
pempty[*prdptr[i]-NTBASE] = OK;
goto more;
}
}
/* now, look at the nonterminals, to see if they are all OK */
NTLOOP(i) {
/* the added production rises or falls as the start symbol ... */
if(i == 0)
continue;
if(pempty[i] != OK) {
fatfl = 0;
error("nonterminal %s never derives any token string", nontrst[i].name);
}
}
if(nerrors) {
summary();
cleantmp();
exits("error");
}
/* now, compute the pempty array, to see which nonterminals derive the empty string */
/* set pempty to WHOKNOWS */
aryfil( pempty, nnonter+1, WHOKNOWS);
/* loop as long as we keep finding empty nonterminals */
again:
PLOOP(1, i) {
/* not known to be empty */
if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
;
/* we have a nontrivially empty nonterminal */
if(*p < 0) {
pempty[*prdptr[i]-NTBASE] = EMPTY;
/* got one ... try for another */
goto again;
}
}
}
}
/*
* generate the states
*/
void
stagen(void)
{
int c, i, j, more;
Wset *p, *q;
/* initialize */
nstate = 0;
/* THIS IS FUNNY from the standpoint of portability
* it represents the magic moment when the mem0 array, which has
* been holding the productions, starts to hold item pointers, of a
* different type...
* someday, alloc should be used to allocate all this stuff... for now, we
* accept that if pointers don't fit in integers, there is a problem...
*/
pstate[0] = pstate[1] = (Item*)mem;
aryfil(clset.lset, tbitset, 0);
putitem(prdptr[0]+1, &clset);
tystate[0] = MUSTDO;
nstate = 1;
pstate[2] = pstate[1];
aryfil(amem, ACTSIZE, 0);
/* now, the main state generation loop */
for(more=1; more;) {
more = 0;
SLOOP(i) {
if(tystate[i] != MUSTDO)
continue;
tystate[i] = DONE;
aryfil(temp1, nnonter+1, 0);
/* take state i, close it, and do gotos */
closure(i);
/* generate goto's */
WSLOOP(wsets, p) {
if(p->flag)
continue;
p->flag = 1;
c = *(p->pitem);
if(c <= 1) {
if(pstate[i+1]-pstate[i] <= p-wsets)
tystate[i] = MUSTLOOKAHEAD;
continue;
}
/* do a goto on c */
WSLOOP(p, q)
/* this item contributes to the goto */
if(c == *(q->pitem)) {
putitem(q->pitem+1, &q->ws);
q->flag = 1;
}
if(c < NTBASE)
state(c); /* register new state */
else
temp1[c-NTBASE] = state(c);
}
if(gsdebug && foutput != 0) {
Bprint(foutput, "%d: ", i);
NTLOOP(j)
if(temp1[j])
Bprint(foutput, "%s %d, ",
nontrst[j].name, temp1[j]);
Bprint(foutput, "\n");
}
indgo[i] = apack(&temp1[1], nnonter-1) - 1;
/* do some more */
more = 1;
}
}
}
/*
* generate the closure of state i
*/
void
closure(int i)
{
Wset *u, *v;
Item *p, *q;
int c, ch, work, k, *pi, **s, **t;
zzclose++;
/* first, copy kernel of state i to wsets */
cwp = wsets;
ITMLOOP(i, p, q) {
cwp->pitem = p->pitem;
cwp->flag = 1; /* this item must get closed */
SETLOOP(k)
cwp->ws.lset[k] = p->look->lset[k];
WSBUMP(cwp);
}
/* now, go through the loop, closing each item */
work = 1;
while(work) {
work = 0;
WSLOOP(wsets, u) {
if(u->flag == 0)
continue;
/* dot is before c */
c = *(u->pitem);
if(c < NTBASE) {
u->flag = 0;
/* only interesting case is where . is before nonterminal */
continue;
}
/* compute the lookahead */
aryfil(clset.lset, tbitset, 0);
/* find items involving c */
WSLOOP(u, v)
if(v->flag == 1 && *(pi=v->pitem) == c) {
v->flag = 0;
if(nolook)
continue;
while((ch = *++pi) > 0) {
/* terminal symbol */
if(ch < NTBASE) {
SETBIT(clset.lset, ch);
break;
}
/* nonterminal symbol */
setunion(clset.lset, pfirst[ch-NTBASE]->lset);
if(!pempty[ch-NTBASE])
break;
}
if(ch <= 0)
setunion(clset.lset, v->ws.lset);
}
/*
* now loop over productions derived from c
* c is now nonterminal number
*/
c -= NTBASE;
t = pres[c+1];
for(s = pres[c]; s < t; ++s) {
/*
* put these items into the closure
* is the item there
*/
WSLOOP(wsets, v)
/* yes, it is there */
if(v->pitem == *s) {
if(nolook)
goto nexts;
if(setunion(v->ws.lset, clset.lset))
v->flag = work = 1;
goto nexts;
}
/* not there; make a new entry */
if(cwp-wsets+1 >= WSETSIZE)
error( "working set overflow");
cwp->pitem = *s;
cwp->flag = 1;
if(!nolook) {
work = 1;
SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
}
WSBUMP(cwp);
nexts:;
}
}
}
/* have computed closure; flags are reset; return */
if(cwp > zzcwp)
zzcwp = cwp;
if(cldebug && foutput != 0) {
Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
WSLOOP(wsets, u) {
if(u->flag)
Bprint(foutput, "flag set!\n");
u->flag = 0;
Bprint(foutput, "\t%s", writem(u->pitem));
prlook(&u->ws);
Bprint(foutput, "\n");
}
}
}
/*
* decide if the lookahead set pointed to by p is known
* return pointer to a perminent location for the set
*/
Lkset*
flset(Lkset *p)
{
Lkset *q;
int *u, *v, *w, j;
for(q = &lkst[nlset]; q-- > lkst;) {
u = p->lset;
v = q->lset;
w = &v[tbitset];
while(v < w)
if(*u++ != *v++)
goto more;
/* we have matched */
return q;
more:;
}
/* add a new one */
q = &lkst[nlset++];
if(nlset >= LSETSIZE)
error("too many lookahead sets");
SETLOOP(j)
q->lset[j] = p->lset[j];
return q;
}
void
cleantmp(void)
{
ZAPFILE(actname);
ZAPFILE(tempname);
}
void
intr(void)
{
cleantmp();
exits("interrupted");
}
void
setup(int argc, char *argv[])
{
long c, t;
int i, j, fd, lev, ty, ytab, *p;
int vflag, dflag, stem;
char actnm[8], *stemc, *s, dirbuf[128];
ytab = 0;
vflag = 0;
dflag = 0;
stem = 0;
stemc = "y";
foutput = 0;
fdefine = 0;
fdebug = 0;
ARGBEGIN{
case 'v':
case 'V':
vflag++;
break;
case 'D':
yydebug = ARGF();
break;
case 'd':
dflag++;
break;
case 'o':
ytab++;
ytabc = ARGF();
break;
case 's':
stem++;
stemc = ARGF();
break;
case 'S':
parser = PARSERS;
break;
default:
error("illegal option: %c", ARGC());
}ARGEND
openup(stemc, dflag, vflag, ytab, ytabc);
if((fd = mkstemp(ttempname)) >= 0){
tempname = ttempname;
ftemp = Bfdopen(fd, OWRITE);
}
if((fd = mkstemp(tactname)) >= 0){
actname = tactname;
faction = Bfdopen(fd, OWRITE);
}
if(ftemp == 0 || faction == 0)
error("cannot open temp file");
if(argc < 1)
error("no input file");
infile = argv[0];
if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
i = strlen(infile)+1+strlen(dirbuf)+1+10;
s = malloc(i);
if(s != nil){
snprint(s, i, "%s/%s", dirbuf, infile);
cleanname(s);
infile = s;
}
}
finput = Bopen(infile, OREAD);
if(finput == 0)
error("cannot open '%s'", argv[0]);
cnamp = cnames;
defin(0, "$end");
extval = PRIVATE; /* tokens start in unicode 'private use' */
defin(0, "error");
defin(1, "$accept");
defin(0, "$unk");
mem = mem0;
i = 0;
for(t = gettok(); t != MARK && t != ENDFILE;)
switch(t) {
case ';':
t = gettok();
break;
case START:
if(gettok() != IDENTIFIER)
error("bad %%start construction");
start = chfind(1, tokname);
t = gettok();
continue;
case TYPEDEF:
if(gettok() != TYPENAME)
error("bad syntax in %%type");
ty = numbval;
for(;;) {
t = gettok();
switch(t) {
case IDENTIFIER:
if((t=chfind(1, tokname)) < NTBASE) {
j = TYPE(toklev[t]);
if(j != 0 && j != ty)
error("type redeclaration of token %s",
tokset[t].name);
else
SETTYPE(toklev[t], ty);
} else {
j = nontrst[t-NTBASE].value;
if(j != 0 && j != ty)
error("type redeclaration of nonterminal %s",
nontrst[t-NTBASE].name );
else
nontrst[t-NTBASE].value = ty;
}
case ',':
continue;
case ';':
t = gettok();
default:
break;
}
break;
}
continue;
case UNION:
/* copy the union declaration to the output */
cpyunion();
t = gettok();
continue;
case LEFT:
case BINARY:
case RIGHT:
i++;
case TERM:
/* nonzero means new prec. and assoc. */
lev = t-TERM;
ty = 0;
/* get identifiers so defined */
t = gettok();
/* there is a type defined */
if(t == TYPENAME) {
ty = numbval;
t = gettok();
}
for(;;) {
switch(t) {
case ',':
t = gettok();
continue;
case ';':
break;
case IDENTIFIER:
j = chfind(0, tokname);
if(j >= NTBASE)
error("%s defined earlier as nonterminal", tokname);
if(lev) {
if(ASSOC(toklev[j]))
error("redeclaration of precedence of %s", tokname);
SETASC(toklev[j], lev);
SETPLEV(toklev[j], i);
}
if(ty) {
if(TYPE(toklev[j]))
error("redeclaration of type of %s", tokname);
SETTYPE(toklev[j],ty);
}
t = gettok();
if(t == NUMBER) {
tokset[j].value = numbval;
if(j < ndefout && j > 3)
error("please define type number of %s earlier",
tokset[j].name);
t = gettok();
}
continue;
}
break;
}
continue;
case LCURLY:
defout(0);
cpycode();
t = gettok();
continue;
default:
error("syntax error");
}
if(t == ENDFILE)
error("unexpected EOF before %%");
/* t is MARK */
Bprint(ftable, "extern int yyerrflag;\n");
Bprint(ftable, "#ifndef YYMAXDEPTH\n");
Bprint(ftable, "#define YYMAXDEPTH 150\n");
Bprint(ftable, "#endif\n" );
if(!ntypes) {
Bprint(ftable, "#ifndef YYSTYPE\n");
Bprint(ftable, "#define YYSTYPE int\n");
Bprint(ftable, "#endif\n");
}
Bprint(ftable, "YYSTYPE yylval;\n");
Bprint(ftable, "YYSTYPE yyval;\n");
prdptr[0] = mem;
/* added production */
*mem++ = NTBASE;
/* if start is 0, we will overwrite with the lhs of the first rule */
*mem++ = start;
*mem++ = 1;
*mem++ = 0;
prdptr[1] = mem;
while((t=gettok()) == LCURLY)
cpycode();
if(t != IDENTCOLON)
error("bad syntax on first rule");
if(!start)
prdptr[0][1] = chfind(1, tokname);
/* read rules */
while(t != MARK && t != ENDFILE) {
/* process a rule */
rlines[nprod] = lineno;
if(t == '|')
*mem++ = *prdptr[nprod-1];
else
if(t == IDENTCOLON) {
*mem = chfind(1, tokname);
if(*mem < NTBASE)
error("token illegal on LHS of grammar rule");
mem++;
} else
error("illegal rule: missing semicolon or | ?");
/* read rule body */
t = gettok();
more_rule:
while(t == IDENTIFIER) {
*mem = chfind(1, tokname);
if(*mem < NTBASE)
levprd[nprod] = toklev[*mem];
mem++;
t = gettok();
}
if(t == PREC) {
if(gettok() != IDENTIFIER)
error("illegal %%prec syntax");
j = chfind(2, tokname);
if(j >= NTBASE)
error("nonterminal %s illegal after %%prec",
nontrst[j-NTBASE].name);
levprd[nprod] = toklev[j];
t = gettok();
}
if(t == '=') {
levprd[nprod] |= ACTFLAG;
Bprint(faction, "\ncase %d:", nprod);
cpyact(mem-prdptr[nprod]-1);
Bprint(faction, " break;");
if((t=gettok()) == IDENTIFIER) {
/* action within rule... */
sprint(actnm, "$$%d", nprod);
/* make it a nonterminal */
j = chfind(1, actnm);
/*
* the current rule will become rule number nprod+1
* move the contents down, and make room for the null
*/
for(p = mem; p >= prdptr[nprod]; --p)
p[2] = *p;
mem += 2;
/* enter null production for action */
p = prdptr[nprod];
*p++ = j;
*p++ = -nprod;
/* update the production information */
levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
levprd[nprod] = ACTFLAG;
if(++nprod >= NPROD)
error("more than %d rules", NPROD);
prdptr[nprod] = p;
/* make the action appear in the original rule */
*mem++ = j;
/* get some more of the rule */
goto more_rule;
}
}
while(t == ';')
t = gettok();
*mem++ = -nprod;
/* check that default action is reasonable */
if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
/* no explicit action, LHS has value */
int tempty;
tempty = prdptr[nprod][1];
if(tempty < 0)
error("must return a value, since LHS has a type");
else
if(tempty >= NTBASE)
tempty = nontrst[tempty-NTBASE].value;
else
tempty = TYPE(toklev[tempty]);
if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
error("default action causes potential type clash");
}
nprod++;
if(nprod >= NPROD)
error("more than %d rules", NPROD);
prdptr[nprod] = mem;
levprd[nprod] = 0;
}
/* end of all rules */
defout(1);
finact();
if(t == MARK) {
Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
while((c=Bgetrune(finput)) != Beof)
Bputrune(ftable, c);
}
Bterm(finput);
}
/*
* finish action routine
*/
void
finact(void)
{
Bterm(faction);
Bprint(ftable, "#define YYEOFCODE %d\n", 1);
Bprint(ftable, "#define YYERRCODE %d\n", 2);
}
/*
* define s to be a terminal if t=0
* or a nonterminal if t=1
*/
int
defin(int nt, char *s)
{
int val;
Rune rune;
val = 0;
if(nt) {
nnonter++;
if(nnonter >= NNONTERM)
error("too many nonterminals, limit %d",NNONTERM);
nontrst[nnonter].name = cstash(s);
return NTBASE + nnonter;
}
/* must be a token */
ntokens++;
if(ntokens >= NTERMS)
error("too many terminals, limit %d", NTERMS);
tokset[ntokens].name = cstash(s);
/* establish value for token */
/* single character literal */
if(s[0] == ' ') {
val = chartorune(&rune, &s[1]);
if(s[val+1] == 0) {
val = rune;
goto out;
}
}
/* escape sequence */
if(s[0] == ' ' && s[1] == '\\') {
if(s[3] == 0) {
/* single character escape sequence */
switch(s[2]) {
case 'n': val = '\n'; break;
case 'r': val = '\r'; break;
case 'b': val = '\b'; break;
case 't': val = '\t'; break;
case 'f': val = '\f'; break;
case '\'': val = '\''; break;
case '"': val = '"'; break;
case '\\': val = '\\'; break;
default: error("invalid escape");
}
goto out;
}
/* \nnn sequence */
if(s[2] >= '0' && s[2] <= '7') {
if(s[3] < '0' ||
s[3] > '7' ||
s[4] < '0' ||
s[4] > '7' ||
s[5] != 0)
error("illegal \\nnn construction");
val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
if(val == 0)
error("'\\000' is illegal");
goto out;
}
error("unknown escape");
}
val = extval++;
out:
tokset[ntokens].value = val;
toklev[ntokens] = 0;
return ntokens;
}
/*
* write out the defines (at the end of the declaration section)
*/
void
defout(int last)
{
int i, c;
char sar[NAMESIZE+10];
for(i=ndefout; i<=ntokens; i++) {
/* non-literals */
c = tokset[i].name[0];
if(c != ' ' && c != '$') {
Bprint(ftable, "#define %s %d\n",
tokset[i].name, tokset[i].value);
if(fdefine)
Bprint(fdefine, "#define\t%s\t%d\n",
tokset[i].name, tokset[i].value);
}
}
ndefout = ntokens+1;
if(last && fdebug) {
Bprint(fdebug, "char* yytoknames[] =\n{\n");
TLOOP(i) {
if(tokset[i].name) {
chcopy(sar, tokset[i].name);
Bprint(fdebug, "\t\"%s\",\n", sar);
continue;
}
Bprint(fdebug, "\t0,\n");
}
Bprint(fdebug, "};\n");
}
}
char*
cstash(char *s)
{
char *temp;
temp = cnamp;
do {
if(cnamp >= &cnames[cnamsz])
error("too many characters in id's and literals");
else
*cnamp++ = *s;
} while(*s++);
return temp;
}
long
gettok(void)
{
long c;
Rune rune;
int i, base, match, reserve;
static int peekline;
begin:
reserve = 0;
lineno += peekline;
peekline = 0;
c = Bgetrune(finput);
while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
if(c == '\n')
lineno++;
c = Bgetrune(finput);
}
/* skip comment */
if(c == '/') {
lineno += skipcom();
goto begin;
}
switch(c) {
case Beof:
return ENDFILE;
case '{':
Bungetrune(finput);
return '=';
case '<':
/* get, and look up, a type name (union member name) */
i = 0;
while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
rune = c;
c = runetochar(&tokname[i], &rune);
if(i < NAMESIZE)
i += c;
}
if(c != '>')
error("unterminated < ... > clause");
tokname[i] = 0;
for(i=1; i<=ntypes; i++)
if(!strcmp(typeset[i], tokname)) {
numbval = i;
return TYPENAME;
}
ntypes++;
numbval = ntypes;
typeset[numbval] = cstash(tokname);
return TYPENAME;
case '"':
case '\'':
match = c;
tokname[0] = ' ';
i = 1;
for(;;) {
c = Bgetrune(finput);
if(c == '\n' || c <= 0)
error("illegal or missing ' or \"" );
if(c == '\\') {
tokname[i] = '\\';
if(i < NAMESIZE)
i++;
c = Bgetrune(finput);
} else
if(c == match)
break;
rune = c;
c = runetochar(&tokname[i], &rune);
if(i < NAMESIZE)
i += c;
}
break;
case '%':
case '\\':
switch(c = Bgetrune(finput)) {
case '0': return TERM;
case '<': return LEFT;
case '2': return BINARY;
case '>': return RIGHT;
case '%':
case '\\': return MARK;
case '=': return PREC;
case '{': return LCURLY;
default: reserve = 1;
}
default:
/* number */
if(isdigit(c)) {
numbval = c-'0';
base = (c=='0')? 8: 10;
for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
numbval = numbval*base + (c-'0');
Bungetrune(finput);
return NUMBER;
}
if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
i = 0;
while(islower(c) || isupper(c) || isdigit(c) ||
c == '-' || c=='_' || c=='.' || c=='$') {
if(reserve && isupper(c))
c += 'a'-'A';
rune = c;
c = runetochar(&tokname[i], &rune);
if(i < NAMESIZE)
i += c;
c = Bgetrune(finput);
}
} else
return c;
Bungetrune(finput);
}
tokname[i] = 0;
/* find a reserved word */
if(reserve) {
for(c=0; resrv[c].name; c++)
if(strcmp(tokname, resrv[c].name) == 0)
return resrv[c].value;
error("invalid escape, or illegal reserved word: %s", tokname);
}
/* look ahead to distinguish IDENTIFIER from IDENTCOLON */
c = Bgetrune(finput);
while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
if(c == '\n')
peekline++;
/* look for comments */
if(c == '/')
peekline += skipcom();
c = Bgetrune(finput);
}
if(c == ':')
return IDENTCOLON;
Bungetrune(finput);
return IDENTIFIER;
}
/*
* determine the type of a symbol
*/
int
fdtype(int t)
{
int v;
if(t >= NTBASE)
v = nontrst[t-NTBASE].value;
else
v = TYPE(toklev[t]);
if(v <= 0)
error("must specify type for %s", (t>=NTBASE)?
nontrst[t-NTBASE].name: tokset[t].name);
return v;
}
int
chfind(int t, char *s)
{
int i;
if(s[0] == ' ')
t = 0;
TLOOP(i)
if(!strcmp(s, tokset[i].name))
return i;
NTLOOP(i)
if(!strcmp(s, nontrst[i].name))
return NTBASE+i;
/* cannot find name */
if(t > 1)
error("%s should have been defined earlier", s);
return defin(t, s);
}
/*
* copy the union declaration to the output, and the define file if present
*/
void
cpyunion(void)
{
long c;
int level;
Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
Bprint(ftable, "typedef union ");
if(fdefine != 0)
Bprint(fdefine, "\ntypedef union ");
level = 0;
for(;;) {
if((c=Bgetrune(finput)) == Beof)
error("EOF encountered while processing %%union");
Bputrune(ftable, c);
if(fdefine != 0)
Bputrune(fdefine, c);
switch(c) {
case '\n':
lineno++;
break;
case '{':
level++;
break;
case '}':
level--;
/* we are finished copying */
if(level == 0) {
Bprint(ftable, " YYSTYPE;\n");
if(fdefine != 0)
Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
return;
}
}
}
}
/*
* copies code between \{ and \}
*/
void
cpycode(void)
{
long c;
c = Bgetrune(finput);
if(c == '\n') {
c = Bgetrune(finput);
lineno++;
}
Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
while(c != Beof) {
if(c == '\\') {
if((c=Bgetrune(finput)) == '}')
return;
Bputc(ftable, '\\');
}
if(c == '%') {
if((c=Bgetrune(finput)) == '}')
return;
Bputc(ftable, '%');
}
Bputrune(ftable, c);
if(c == '\n')
lineno++;
c = Bgetrune(finput);
}
error("eof before %%}");
}
/*
* skip over comments
* skipcom is called after reading a '/'
*/
int
skipcom(void)
{
long c;
int i;
/* i is the number of lines skipped */
i = 0;
if(Bgetrune(finput) != '*')
error("illegal comment");
c = Bgetrune(finput);
while(c != Beof) {
while(c == '*')
if((c=Bgetrune(finput)) == '/')
return i;
if(c == '\n')
i++;
c = Bgetrune(finput);
}
error("EOF inside comment");
return 0;
}
/*
* copy C action to the next ; or closing }
*/
void
cpyact(int offset)
{
long c;
int brac, match, j, s, fnd, tok;
Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
brac = 0;
loop:
c = Bgetrune(finput);
swt:
switch(c) {
case ';':
if(brac == 0) {
Bputrune(faction, c);
return;
}
goto lcopy;
case '{':
brac++;
goto lcopy;
case '$':
s = 1;
tok = -1;
c = Bgetrune(finput);
/* type description */
if(c == '<') {
Bungetrune(finput);
if(gettok() != TYPENAME)
error("bad syntax on $<ident> clause");
tok = numbval;
c = Bgetrune(finput);
}
if(c == '$') {
Bprint(faction, "yyval");
/* put out the proper tag... */
if(ntypes) {
if(tok < 0)
tok = fdtype(*prdptr[nprod]);
Bprint(faction, ".%s", typeset[tok]);
}
goto loop;
}
if(c == '-') {
s = -s;
c = Bgetrune(finput);
}
if(isdigit(c)) {
j = 0;
while(isdigit(c)) {
j = j*10 + (c-'0');
c = Bgetrune(finput);
}
Bungetrune(finput);
j = j*s - offset;
if(j > 0)
error("Illegal use of $%d", j+offset);
dollar:
Bprint(faction, "yypt[-%d].yyv", -j);
/* put out the proper tag */
if(ntypes) {
if(j+offset <= 0 && tok < 0)
error("must specify type of $%d", j+offset);
if(tok < 0)
tok = fdtype(prdptr[nprod][j+offset]);
Bprint(faction, ".%s", typeset[tok]);
}
goto loop;
}
if(isupper(c) || islower(c) || c == '_' || c == '.') {
int tok; /* tok used oustide for type info */
/* look for $name */
Bungetrune(finput);
if(gettok() != IDENTIFIER)
error("$ must be followed by an identifier");
tok = chfind(2, tokname);
if((c = Bgetrune(finput)) != '#') {
Bungetrune(finput);
fnd = -1;
} else
if(gettok() != NUMBER) {
error("# must be followed by number");
fnd = -1;
} else
fnd = numbval;
for(j=1; j<=offset; ++j)
if(tok == prdptr[nprod][j]) {
if(--fnd <= 0) {
j -= offset;
goto dollar;
}
}
error("$name or $name#number not found");
}
Bputc(faction, '$');
if(s < 0 )
Bputc(faction, '-');
goto swt;
case '}':
brac--;
if(brac)
goto lcopy;
Bputrune(faction, c);
return;
case '/':
/* look for comments */
Bputrune(faction, c);
c = Bgetrune(finput);
if(c != '*')
goto swt;
/* it really is a comment */
Bputrune(faction, c);
c = Bgetrune(finput);
while(c >= 0) {
while(c == '*') {
Bputrune(faction, c);
if((c=Bgetrune(finput)) == '/')
goto lcopy;
}
Bputrune(faction, c);
if(c == '\n')
lineno++;
c = Bgetrune(finput);
}
error("EOF inside comment");
case '\'':
/* character constant */
match = '\'';
goto string;
case '"':
/* character string */
match = '"';
string:
Bputrune(faction, c);
while(c = Bgetrune(finput)) {
if(c == '\\') {
Bputrune(faction, c);
c = Bgetrune(finput);
if(c == '\n')
lineno++;
} else
if(c == match)
goto lcopy;
if(c == '\n')
error("newline in string or char. const.");
Bputrune(faction, c);
}
error("EOF in string or character constant");
case Beof:
error("action does not terminate");
case '\n':
lineno++;
goto lcopy;
}
lcopy:
Bputrune(faction, c);
goto loop;
}
void
openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
{
char buf[256];
if(vflag) {
sprint(buf, "%s.%s", stem, FILEU);
foutput = Bopen(buf, OWRITE);
if(foutput == 0)
error("cannot open %s", buf);
}
if(yydebug) {
sprint(buf, "%s.%s", stem, FILEDEBUG);
if((fdebug = Bopen(buf, OWRITE)) == 0)
error("can't open %s", buf);
}
if(dflag) {
sprint(buf, "%s.%s", stem, FILED);
fdefine = Bopen(buf, OWRITE);
if(fdefine == 0)
error("can't create %s", buf);
}
if(ytab == 0)
sprint(buf, "%s.%s", stem, OFILE);
else
strcpy(buf, ytabc);
ftable = Bopen(buf, OWRITE);
if(ftable == 0)
error("cannot open table file %s", buf);
}
/*
* print the output for the states
*/
void
output(void)
{
int i, k, c;
Wset *u, *v;
Bprint(ftable, "short yyexca[] =\n{");
if(fdebug)
Bprint(fdebug, "char* yystates[] =\n{\n");
/* output the stuff for state i */
SLOOP(i) {
nolook = tystate[i]!=MUSTLOOKAHEAD;
closure(i);
/* output actions */
nolook = 1;
aryfil(temp1, ntokens+nnonter+1, 0);
WSLOOP(wsets, u) {
c = *(u->pitem);
if(c > 1 && c < NTBASE && temp1[c] == 0) {
WSLOOP(u, v)
if(c == *(v->pitem))
putitem(v->pitem+1, (Lkset*)0);
temp1[c] = state(c);
} else
if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
temp1[c+ntokens] = amem[indgo[i]+c];
}
if(i == 1)
temp1[1] = ACCEPTCODE;
/* now, we have the shifts; look at the reductions */
lastred = 0;
WSLOOP(wsets, u) {
c = *u->pitem;
/* reduction */
if(c <= 0) {
lastred = -c;
TLOOP(k)
if(BIT(u->ws.lset, k)) {
if(temp1[k] == 0)
temp1[k] = c;
else
if(temp1[k] < 0) { /* reduce/reduce conflict */
if(foutput)
Bprint(foutput,
"\n%d: reduce/reduce conflict"
" (red'ns %d and %d ) on %s",
i, -temp1[k], lastred,
symnam(k));
if(-temp1[k] > lastred)
temp1[k] = -lastred;
zzrrconf++;
} else
/* potential shift/reduce conflict */
precftn( lastred, k, i );
}
}
}
wract(i);
}
if(fdebug)
Bprint(fdebug, "};\n");
Bprint(ftable, "};\n");
Bprint(ftable, "#define YYNPROD %d\n", nprod);
Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
if(yydebug)
Bprint(ftable, "#define yydebug %s\n", yydebug);
}
/*
* pack state i from temp1 into amem
*/
int
apack(int *p, int n)
{
int *pp, *qq, *rr, off, *q, *r;
/* we don't need to worry about checking because
* we will only look at entries known to be there...
* eliminate leading and trailing 0's
*/
q = p+n;
for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
;
/* no actions */
if(pp > q)
return 0;
p = pp;
/* now, find a place for the elements from p to q, inclusive */
r = &amem[ACTSIZE-1];
for(rr = amem; rr <= r; rr++, off++) {
for(qq = rr, pp = p; pp <= q; pp++, qq++)
if(*pp != 0)
if(*pp != *qq && *qq != 0)
goto nextk;
/* we have found an acceptable k */
if(pkdebug && foutput != 0)
Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
for(qq = rr, pp = p; pp <= q; pp++, qq++)
if(*pp) {
if(qq > r)
error("action table overflow");
if(qq > memp)
memp = qq;
*qq = *pp;
}
if(pkdebug && foutput != 0)
for(pp = amem; pp <= memp; pp += 10) {
Bprint(foutput, "\t");
for(qq = pp; qq <= pp+9; qq++)
Bprint(foutput, "%d ", *qq);
Bprint(foutput, "\n");
}
return(off);
nextk:;
}
error("no space in action table");
return 0;
}
/*
* output the gotos for the nontermninals
*/
void
go2out(void)
{
int i, j, k, best, count, cbest, times;
/* mark begining of gotos */
Bprint(ftemp, "$\n");
for(i = 1; i <= nnonter; i++) {
go2gen(i);
/* find the best one to make default */
best = -1;
times = 0;
/* is j the most frequent */
for(j = 0; j <= nstate; j++) {
if(tystate[j] == 0)
continue;
if(tystate[j] == best)
continue;
/* is tystate[j] the most frequent */
count = 0;
cbest = tystate[j];
for(k = j; k <= nstate; k++)
if(tystate[k] == cbest)
count++;
if(count > times) {
best = cbest;
times = count;
}
}
/* best is now the default entry */
zzgobest += times-1;
for(j = 0; j <= nstate; j++)
if(tystate[j] != 0 && tystate[j] != best) {
Bprint(ftemp, "%d,%d,", j, tystate[j]);
zzgoent++;
}
/* now, the default */
if(best == -1)
best = 0;
zzgoent++;
Bprint(ftemp, "%d\n", best);
}
}
/*
* output the gotos for nonterminal c
*/
void
go2gen(int c)
{
int i, work, cc;
Item *p, *q;
/* first, find nonterminals with gotos on c */
aryfil(temp1, nnonter+1, 0);
temp1[c] = 1;
work = 1;
while(work) {
work = 0;
PLOOP(0, i)
/* cc is a nonterminal */
if((cc=prdptr[i][1]-NTBASE) >= 0)
/* cc has a goto on c */
if(temp1[cc] != 0) {
/* thus, the left side of production i does too */
cc = *prdptr[i]-NTBASE;
if(temp1[cc] == 0) {
work = 1;
temp1[cc] = 1;
}
}
}
/* now, we have temp1[c] = 1 if a goto on c in closure of cc */
if(g2debug && foutput != 0) {
Bprint(foutput, "%s: gotos on ", nontrst[c].name);
NTLOOP(i)
if(temp1[i])
Bprint(foutput, "%s ", nontrst[i].name);
Bprint(foutput, "\n");
}
/* now, go through and put gotos into tystate */
aryfil(tystate, nstate, 0);
SLOOP(i)
ITMLOOP(i, p, q)
if((cc = *p->pitem) >= NTBASE)
/* goto on c is possible */
if(temp1[cc-NTBASE]) {
tystate[i] = amem[indgo[i]+c];
break;
}
}
/*
* decide a shift/reduce conflict by precedence.
* r is a rule number, t a token number
* the conflict is in state s
* temp1[t] is changed to reflect the action
*/
void
precftn(int r, int t, int s)
{
int lp, lt, action;
lp = levprd[r];
lt = toklev[t];
if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
/* conflict */
if(foutput != 0)
Bprint(foutput,
"\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
zzsrconf++;
return;
}
if(PLEVEL(lt) == PLEVEL(lp))
action = ASSOC(lt);
else
if(PLEVEL(lt) > PLEVEL(lp))
action = RASC; /* shift */
else
action = LASC; /* reduce */
switch(action) {
case BASC: /* error action */
temp1[t] = ERRCODE;
break;
case LASC: /* reduce */
temp1[t] = -r;
break;
}
}
/*
* output state i
* temp1 has the actions, lastred the default
*/
void
wract(int i)
{
int p, p0, p1, ntimes, tred, count, j, flag;
/* find the best choice for lastred */
lastred = 0;
ntimes = 0;
TLOOP(j) {
if(temp1[j] >= 0)
continue;
if(temp1[j]+lastred == 0)
continue;
/* count the number of appearances of temp1[j] */
count = 0;
tred = -temp1[j];
levprd[tred] |= REDFLAG;
TLOOP(p)
if(temp1[p]+tred == 0)
count++;
if(count > ntimes) {
lastred = tred;
ntimes = count;
}
}
/*
* for error recovery, arrange that, if there is a shift on the
* error recovery token, `error', that the default be the error action
*/
if(temp1[2] > 0)
lastred = 0;
/* clear out entries in temp1 which equal lastred */
TLOOP(p)
if(temp1[p]+lastred == 0)
temp1[p] = 0;
wrstate(i);
defact[i] = lastred;
flag = 0;
TLOOP(p0)
if((p1=temp1[p0]) != 0) {
if(p1 < 0) {
p1 = -p1;
goto exc;
}
if(p1 == ACCEPTCODE) {
p1 = -1;
goto exc;
}
if(p1 == ERRCODE) {
p1 = 0;
exc:
if(flag++ == 0)
Bprint(ftable, "-1, %d,\n", i);
Bprint(ftable, "\t%d, %d,\n", p0, p1);
zzexcp++;
continue;
}
Bprint(ftemp, "%d,%d,", p0, p1);
zzacent++;
}
if(flag) {
defact[i] = -2;
Bprint(ftable, "\t-2, %d,\n", lastred);
}
Bprint(ftemp, "\n");
}
/*
* writes state i
*/
void
wrstate(int i)
{
int j0, j1;
Item *pp, *qq;
Wset *u;
if(fdebug) {
if(lastred) {
Bprint(fdebug, " 0, /*%d*/\n", i);
} else {
Bprint(fdebug, " \"");
ITMLOOP(i, pp, qq)
Bprint(fdebug, "%s\\n", writem(pp->pitem));
if(tystate[i] == MUSTLOOKAHEAD)
WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
if(*u->pitem < 0)
Bprint(fdebug, "%s\\n", writem(u->pitem));
Bprint(fdebug, "\", /*%d*/\n", i);
}
}
if(foutput == 0)
return;
Bprint(foutput, "\nstate %d\n", i);
ITMLOOP(i, pp, qq)
Bprint(foutput, "\t%s\n", writem(pp->pitem));
if(tystate[i] == MUSTLOOKAHEAD)
/* print out empty productions in closure */
WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
if(*u->pitem < 0)
Bprint(foutput, "\t%s\n", writem(u->pitem));
/* check for state equal to another */
TLOOP(j0)
if((j1=temp1[j0]) != 0) {
Bprint(foutput, "\n\t%s ", symnam(j0));
/* shift, error, or accept */
if(j1 > 0) {
if(j1 == ACCEPTCODE)
Bprint(foutput, "accept");
else
if(j1 == ERRCODE)
Bprint(foutput, "error");
else
Bprint(foutput, "shift %d", j1);
} else
Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
}
/* output the final production */
if(lastred)
Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
lastred, rlines[lastred]);
else
Bprint(foutput, "\n\t. error\n\n");
/* now, output nonterminal actions */
j1 = ntokens;
for(j0 = 1; j0 <= nnonter; j0++) {
j1++;
if(temp1[j1])
Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
}
}
void
warray(char *s, int *v, int n)
{
int i;
Bprint(ftable, "short %s[] =\n{", s);
for(i=0;;) {
if(i%10 == 0)
Bprint(ftable, "\n");
Bprint(ftable, "%4d", v[i]);
i++;
if(i >= n) {
Bprint(ftable, "\n};\n");
break;
}
Bprint(ftable, ",");
}
}
/*
* in order to free up the mem and amem arrays for the optimizer,
* and still be able to output yyr1, etc., after the sizes of
* the action array is known, we hide the nonterminals
* derived by productions in levprd.
*/
void
hideprod(void)
{
int i, j;
j = 0;
levprd[0] = 0;
PLOOP(1, i) {
if(!(levprd[i] & REDFLAG)) {
j++;
if(foutput != 0)
Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
}
levprd[i] = *prdptr[i] - NTBASE;
}
if(j)
print("%d rules never reduced\n", j);
}
void
callopt(void)
{
int i, *p, j, k, *q;
/* read the arrays from tempfile and set parameters */
finput = Bopen(tempname, OREAD);
if(finput == 0)
error("optimizer cannot open tempfile");
pgo[0] = 0;
temp1[0] = 0;
nstate = 0;
nnonter = 0;
for(;;) {
switch(gtnm()) {
case '\n':
nstate++;
pmem--;
temp1[nstate] = pmem - mem0;
case ',':
continue;
case '$':
break;
default:
error("bad tempfile");
}
break;
}
pmem--;
temp1[nstate] = yypgo[0] = pmem - mem0;
for(;;) {
switch(gtnm()) {
case '\n':
nnonter++;
yypgo[nnonter] = pmem-mem0;
case ',':
continue;
case -1:
break;
default:
error("bad tempfile");
}
break;
}
pmem--;
yypgo[nnonter--] = pmem - mem0;
for(i = 0; i < nstate; i++) {
k = 32000;
j = 0;
q = mem0 + temp1[i+1];
for(p = mem0 + temp1[i]; p < q ; p += 2) {
if(*p > j)
j = *p;
if(*p < k)
k = *p;
}
/* nontrivial situation */
if(k <= j) {
/* j is now the range */
/* j -= k; *//* call scj */
if(k > maxoff)
maxoff = k;
}
tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
if(j > maxspr)
maxspr = j;
}
/* initialize ggreed table */
for(i = 1; i <= nnonter; i++) {
ggreed[i] = 1;
j = 0;
/* minimum entry index is always 0 */
q = mem0 + yypgo[i+1] - 1;
for(p = mem0+yypgo[i]; p < q ; p += 2) {
ggreed[i] += 2;
if(*p > j)
j = *p;
}
ggreed[i] = ggreed[i] + 2*j;
if(j > maxoff)
maxoff = j;
}
/* now, prepare to put the shift actions into the amem array */
for(i = 0; i < ACTSIZE; i++)
amem[i] = 0;
maxa = amem;
for(i = 0; i < nstate; i++) {
if(tystate[i] == 0 && adb > 1)
Bprint(ftable, "State %d: null\n", i);
indgo[i] = YYFLAG1;
}
while((i = nxti()) != NOMORE)
if(i >= 0)
stin(i);
else
gin(-i);
/* print amem array */
if(adb > 2 )
for(p = amem; p <= maxa; p += 10) {
Bprint(ftable, "%4d ", (int)(p-amem));
for(i = 0; i < 10; ++i)
Bprint(ftable, "%4d ", p[i]);
Bprint(ftable, "\n");
}
/* write out the output appropriate to the language */
aoutput();
osummary();
ZAPFILE(tempname);
}
void
gin(int i)
{
int *p, *r, *s, *q1, *q2;
/* enter gotos on nonterminal i into array amem */
ggreed[i] = 0;
q2 = mem0+ yypgo[i+1] - 1;
q1 = mem0 + yypgo[i];
/* now, find amem place for it */
for(p = amem; p < &amem[ACTSIZE]; p++) {
if(*p)
continue;
for(r = q1; r < q2; r += 2) {
s = p + *r + 1;
if(*s)
goto nextgp;
if(s > maxa)
if((maxa = s) > &amem[ACTSIZE])
error("a array overflow");
}
/* we have found amem spot */
*p = *q2;
if(p > maxa)
if((maxa = p) > &amem[ACTSIZE])
error("a array overflow");
for(r = q1; r < q2; r += 2) {
s = p + *r + 1;
*s = r[1];
}
pgo[i] = p-amem;
if(adb > 1)
Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
return;
nextgp:;
}
error("cannot place goto %d\n", i);
}
void
stin(int i)
{
int *r, *s, n, flag, j, *q1, *q2;
tystate[i] = 0;
/* enter state i into the amem array */
q2 = mem0+temp1[i+1];
q1 = mem0+temp1[i];
/* find an acceptable place */
for(n = -maxoff; n < ACTSIZE; n++) {
flag = 0;
for(r = q1; r < q2; r += 2) {
if((s = *r + n + amem) < amem)
goto nextn;
if(*s == 0)
flag++;
else
if(*s != r[1])
goto nextn;
}
/* check that the position equals another only if the states are identical */
for(j=0; j<nstate; j++) {
if(indgo[j] == n) {
/* we have some disagreement */
if(flag)
goto nextn;
if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
/* states are equal */
indgo[i] = n;
if(adb > 1)
Bprint(ftable,
"State %d: entry at %d equals state %d\n",
i, n, j);
return;
}
/* we have some disagreement */
goto nextn;
}
}
for(r = q1; r < q2; r += 2) {
if((s = *r+n+amem) >= &amem[ACTSIZE])
error("out of space in optimizer a array");
if(s > maxa)
maxa = s;
if(*s != 0 && *s != r[1])
error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
*s = r[1];
}
indgo[i] = n;
if(adb > 1)
Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
return;
nextn:;
}
error("Error; failure to place state %d\n", i);
}
/*
* finds the next i
*/
int
nxti(void)
{
int i, max, maxi;
max = 0;
maxi = 0;
for(i = 1; i <= nnonter; i++)
if(ggreed[i] >= max) {
max = ggreed[i];
maxi = -i;
}
for(i = 0; i < nstate; ++i)
if(tystate[i] >= max) {
max = tystate[i];
maxi = i;
}
if(nxdb)
Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
if(max == 0)
return NOMORE;
return maxi;
}
/*
* write summary
*/
void
osummary(void)
{
int i, *p;
if(foutput == 0)
return;
i = 0;
for(p = maxa; p >= amem; p--)
if(*p == 0)
i++;
Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
(int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
}
/*
* this version is for C
* write out the optimized parser
*/
void
aoutput(void)
{
Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
arout("yyact", amem, (maxa-amem)+1);
arout("yypact", indgo, nstate);
arout("yypgo", pgo, nnonter+1);
}
void
arout(char *s, int *v, int n)
{
int i;
Bprint(ftable, "short %s[] =\n{", s);
for(i = 0; i < n;) {
if(i%10 == 0)
Bprint(ftable, "\n");
Bprint(ftable, "%4d", v[i]);
i++;
if(i == n)
Bprint(ftable, "\n};\n");
else
Bprint(ftable, ",");
}
}
/*
* read and convert an integer from the standard input
* return the terminating character
* blanks, tabs, and newlines are ignored
*/
int
gtnm(void)
{
int sign, val, c;
sign = 0;
val = 0;
while((c=Bgetrune(finput)) != Beof) {
if(isdigit(c)) {
val = val*10 + c-'0';
continue;
}
if(c == '-') {
sign = 1;
continue;
}
break;
}
if(sign)
val = -val;
*pmem++ = val;
if(pmem >= &mem0[MEMSIZE])
error("out of space");
return c;
}