mirror of
https://github.com/9fans/plan9port.git
synced 2025-01-15 11:20:03 +00:00
1382 lines
22 KiB
C
1382 lines
22 KiB
C
|
#include <u.h>
|
||
|
#include <libc.h>
|
||
|
#include <bio.h>
|
||
|
#include <ctype.h>
|
||
|
#include "code.h"
|
||
|
|
||
|
/* fig leaves for possibly signed char quantities */
|
||
|
#define ISUPPER(c) isupper((c)&0xff)
|
||
|
#define ISLOWER(c) islower((c)&0xff)
|
||
|
#define ISALPHA(c) isalpha((c)&0xff)
|
||
|
#define ISDIGIT(c) isdigit((c)&0xff)
|
||
|
#define ISVOWEL(c) voweltab[(c)&0xff]
|
||
|
#define Tolower(c) (ISUPPER(c)? (c)-'A'+'a': (c))
|
||
|
#define pair(a,b) (((a)<<8) | (b))
|
||
|
#define DLEV 2
|
||
|
#define DSIZ 40
|
||
|
|
||
|
typedef long Bits;
|
||
|
#define Set(h, f) ((long)(h) & (f))
|
||
|
|
||
|
Bits nop(char*, char*, char*, int, int);
|
||
|
Bits strip(char*, char*, char*, int, int);
|
||
|
Bits ize(char*, char*, char*, int, int);
|
||
|
Bits i_to_y(char*, char*, char*, int, int);
|
||
|
Bits ily(char*, char*, char*, int, int);
|
||
|
Bits subst(char*, char*, char*, int, int);
|
||
|
Bits CCe(char*, char*, char*, int, int);
|
||
|
Bits tion(char*, char*, char*, int, int);
|
||
|
Bits an(char*, char*, char*, int, int);
|
||
|
Bits s(char*, char*, char*, int, int);
|
||
|
Bits es(char*, char*, char*, int, int);
|
||
|
Bits bility(char*, char*, char*, int, int);
|
||
|
Bits y_to_e(char*, char*, char*, int, int);
|
||
|
Bits VCe(char*, char*, char*, int, int);
|
||
|
|
||
|
Bits trypref(char*, char*, int, int);
|
||
|
Bits tryword(char*, char*, int, int);
|
||
|
Bits trysuff(char*, int, int);
|
||
|
Bits dict(char*, char*);
|
||
|
void typeprint(Bits);
|
||
|
void pcomma(char*);
|
||
|
|
||
|
void ise(void);
|
||
|
int ordinal(void);
|
||
|
char* skipv(char*);
|
||
|
int inun(char*, Bits);
|
||
|
char* ztos(char*);
|
||
|
void readdict(char*);
|
||
|
|
||
|
typedef struct Ptab Ptab;
|
||
|
struct Ptab
|
||
|
{
|
||
|
char* s;
|
||
|
int flag;
|
||
|
};
|
||
|
|
||
|
typedef struct Suftab Suftab;
|
||
|
struct Suftab
|
||
|
{
|
||
|
char *suf;
|
||
|
Bits (*p1)(char*, char*, char*, int, int);
|
||
|
int n1;
|
||
|
char *d1;
|
||
|
char *a1;
|
||
|
int flag;
|
||
|
int affixable;
|
||
|
Bits (*p2)(char*, char*, char*, int, int);
|
||
|
int n2;
|
||
|
char *d2;
|
||
|
char *a2;
|
||
|
};
|
||
|
|
||
|
Suftab staba[] = {
|
||
|
{"aibohp",subst,1,"-e+ia","",NOUN, NOUN},
|
||
|
0
|
||
|
};
|
||
|
|
||
|
Suftab stabc[] =
|
||
|
{
|
||
|
{"cai",strip,1,"","+c",N_AFFIX, ADJ|NOUN},
|
||
|
{"citsi",strip,2,"","+ic",N_AFFIX, ADJ | N_AFFIX | NOUN},
|
||
|
{"citi",ize,1,"-e+ic","",N_AFFIX, ADJ },
|
||
|
{"cihparg",i_to_y,1,"-y+ic","",NOUN, ADJ|NOUN },
|
||
|
{"cipocs",ize,1,"-e+ic","",NOUN, ADJ },
|
||
|
{"cirtem",i_to_y,1,"-y+ic","",NOUN, ADJ },
|
||
|
{"cigol",i_to_y,1,"-y+ic","",NOUN, ADJ },
|
||
|
{"cimono",i_to_y,1,"-y+ic","",NOUN, ADJ },
|
||
|
{"cibohp",subst,1,"-e+ic","",NOUN, ADJ },
|
||
|
0
|
||
|
};
|
||
|
Suftab stabd[] =
|
||
|
{
|
||
|
{"de",strip,1,"","+d",ED,ADJ |COMP,i_to_y,2,"-y+ied","+ed"},
|
||
|
{"dooh",ily,4,"-y+ihood","+hood",NOUN | ADV, NOUN},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabe[] =
|
||
|
{
|
||
|
/*
|
||
|
* V_affix for comment ->commence->commentment??
|
||
|
*/
|
||
|
{"ecna",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
|
||
|
{"ecne",subst,1,"-t+ce","",ADJ,N_AFFIX|_Y|NOUN|VERB|ACTOR|V_AFFIX},
|
||
|
{"elbaif",i_to_y,4,"-y+iable","",V_IRREG,ADJ},
|
||
|
{"elba",CCe,4,"-e+able","+able",V_AFFIX,ADJ},
|
||
|
{"evi",subst,0,"-ion+ive","",N_AFFIX | V_AFFIX,NOUN | N_AFFIX| ADJ},
|
||
|
{"ezi",CCe,3,"-e+ize","+ize",N_AFFIX|ADJ ,V_AFFIX | VERB |ION | COMP},
|
||
|
{"ekil",strip,4,"","+like",N_AFFIX ,ADJ},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabg[] =
|
||
|
{
|
||
|
{"gniee",strip,3,"","+ing",V_IRREG ,ADJ|NOUN},
|
||
|
{"gnikam",strip,6,"","+making",NOUN,NOUN},
|
||
|
{"gnipeek",strip,7,"","+keeping",NOUN,NOUN},
|
||
|
{"gni",CCe,3,"-e+ing","+ing",V_IRREG ,ADJ|ED|NOUN},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabl[] =
|
||
|
{
|
||
|
{"ladio",strip,2,"","+al",NOUN |ADJ,ADJ},
|
||
|
{"laci",strip,2,"","+al",NOUN |ADJ,ADJ |NOUN|N_AFFIX},
|
||
|
{"latnem",strip,2,"","+al",N_AFFIX,ADJ},
|
||
|
{"lanoi",strip,2,"","+al",N_AFFIX,ADJ|NOUN},
|
||
|
{"luf",ily,3,"-y+iful","+ful",N_AFFIX,ADJ | NOUN},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabm[] =
|
||
|
{
|
||
|
/* congregational + ism */
|
||
|
{"msi",CCe,3,"-e+ism","ism",N_AFFIX|ADJ,NOUN},
|
||
|
{"margo",subst,-1,"-ph+m","",NOUN,NOUN},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabn[] =
|
||
|
{
|
||
|
{"noitacifi",i_to_y,6,"-y+ication","",ION,NOUN | N_AFFIX},
|
||
|
{"noitazi",ize,4,"-e+ation","",ION,NOUN| N_AFFIX},
|
||
|
{"noit",tion,3,"-e+ion","+ion",ION,NOUN| N_AFFIX | V_AFFIX |VERB|ACTOR},
|
||
|
{"naino",an,3,"","+ian",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
|
||
|
{"namow",strip,5,"","+woman",MAN,PROP_COLLECT|N_AFFIX},
|
||
|
{"nam",strip,3,"","+man",MAN,PROP_COLLECT | N_AFFIX | VERB},
|
||
|
{"na",an,1,"","+n",NOUN|PROP_COLLECT,NOUN | N_AFFIX},
|
||
|
{"nemow",strip,5,"","+women",MAN,PROP_COLLECT},
|
||
|
{"nem",strip,3,"","+man",MAN,PROP_COLLECT},
|
||
|
{"nosrep",strip,6,"","+person",MAN,PROP_COLLECT},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabp[] =
|
||
|
{
|
||
|
{"pihs",strip,4,"","+ship",NOUN|PROP_COLLECT,NOUN| N_AFFIX},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabr[] =
|
||
|
{
|
||
|
{"rehparg",subst,1,"-y+er","",ACTOR,NOUN,strip,2,"","+er"},
|
||
|
{"reyhparg",nop,0,"","",0,NOUN},
|
||
|
{"reyl",nop,0,"","",0,NOUN},
|
||
|
{"rekam",strip,5,"","+maker",NOUN,NOUN},
|
||
|
{"repeek",strip,6,"","+keeper",NOUN,NOUN},
|
||
|
{"re",strip,1,"","+r",ACTOR,NOUN | N_AFFIX|VERB|ADJ, i_to_y,2,"-y+ier","+er"},
|
||
|
{"rota",tion,2,"-e+or","",ION,NOUN| N_AFFIX|_Y},
|
||
|
{"rotc",tion,2,"","+or",ION,NOUN| N_AFFIX},
|
||
|
{"rotp",tion,2,"","+or",ION,NOUN| N_AFFIX},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabs[] =
|
||
|
{
|
||
|
{"ssen",ily,4,"-y+iness","+ness",ADJ|ADV,NOUN| N_AFFIX},
|
||
|
{"ssel",ily,4,"-y+iless","+less",NOUN | PROP_COLLECT,ADJ },
|
||
|
{"se",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH , es,2,"-y+ies","+es"},
|
||
|
{"s'",s,2,"","+'s",PROP_COLLECT | NOUN,DONT_TOUCH },
|
||
|
{"s",s,1,"","+s",NOUN | V_IRREG,DONT_TOUCH },
|
||
|
0
|
||
|
};
|
||
|
Suftab stabt[] =
|
||
|
{
|
||
|
{"tnem",strip,4,"","+ment",V_AFFIX,NOUN | N_AFFIX | ADJ|VERB},
|
||
|
{"tse",strip,2,"","+st",EST,DONT_TOUCH, i_to_y,3,"-y+iest","+est" },
|
||
|
{"tsigol",i_to_y,2,"-y+ist","",N_AFFIX,NOUN | N_AFFIX},
|
||
|
{"tsi",CCe,3,"-e+ist","+ist",N_AFFIX|ADJ,NOUN | N_AFFIX|COMP},
|
||
|
0
|
||
|
};
|
||
|
Suftab staby[] =
|
||
|
{
|
||
|
{"ycna",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
|
||
|
{"ycne",subst,1,"-t+cy","",ADJ | N_AFFIX,NOUN | N_AFFIX},
|
||
|
{"ytilib",bility,5,"-le+ility","",ADJ | V_AFFIX,NOUN | N_AFFIX},
|
||
|
{"ytisuo",nop,0,"","",NOUN},
|
||
|
{"ytilb",nop,0,"","",0,NOUN},
|
||
|
{"yti",CCe,3,"-e+ity","+ity",ADJ ,NOUN | N_AFFIX },
|
||
|
{"ylb",y_to_e,1,"-e+y","",ADJ,ADV},
|
||
|
{"ylc",nop,0,"","",0},
|
||
|
{"ylelb",nop,0,"","",0},
|
||
|
{"ylelp",nop,0,"","",0},
|
||
|
{"yl",ily,2,"-y+ily","+ly",ADJ,ADV|COMP},
|
||
|
{"yrtem",subst,0,"-er+ry","",NOUN,NOUN | N_AFFIX},
|
||
|
{"y",CCe,1,"-e+y","+y",_Y,ADJ|COMP},
|
||
|
0
|
||
|
};
|
||
|
Suftab stabz[] =
|
||
|
{
|
||
|
0
|
||
|
};
|
||
|
Suftab* suftab[] =
|
||
|
{
|
||
|
staba,
|
||
|
stabz,
|
||
|
stabc,
|
||
|
stabd,
|
||
|
stabe,
|
||
|
stabz,
|
||
|
stabg,
|
||
|
stabz,
|
||
|
stabz,
|
||
|
stabz,
|
||
|
stabz,
|
||
|
stabl,
|
||
|
stabm,
|
||
|
stabn,
|
||
|
stabz,
|
||
|
stabp,
|
||
|
stabz,
|
||
|
stabr,
|
||
|
stabs,
|
||
|
stabt,
|
||
|
stabz,
|
||
|
stabz,
|
||
|
stabz,
|
||
|
stabz,
|
||
|
staby,
|
||
|
stabz,
|
||
|
};
|
||
|
|
||
|
Ptab ptaba[] =
|
||
|
{
|
||
|
"anti", 0,
|
||
|
"auto", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabb[] =
|
||
|
{
|
||
|
"bio", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabc[] =
|
||
|
{
|
||
|
"counter", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabd[] =
|
||
|
{
|
||
|
"dis", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabe[] =
|
||
|
{
|
||
|
"electro", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabf[] =
|
||
|
{
|
||
|
"femto", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabg[] =
|
||
|
{
|
||
|
"geo", 0,
|
||
|
"giga", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabh[] =
|
||
|
{
|
||
|
"hyper", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabi[] =
|
||
|
{
|
||
|
"immuno", 0,
|
||
|
"im", IN,
|
||
|
"intra", 0,
|
||
|
"inter", 0,
|
||
|
"in", IN,
|
||
|
"ir", IN,
|
||
|
"iso", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabj[] =
|
||
|
{
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabk[] =
|
||
|
{
|
||
|
"kilo", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabl[] =
|
||
|
{
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabm[] =
|
||
|
{
|
||
|
"magneto", 0,
|
||
|
"mega", 0,
|
||
|
"meta", 0,
|
||
|
"micro", 0,
|
||
|
"mid", 0,
|
||
|
"milli", 0,
|
||
|
"mini", 0,
|
||
|
"mis", 0,
|
||
|
"mono", 0,
|
||
|
"multi", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabn[] =
|
||
|
{
|
||
|
"nano", 0,
|
||
|
"neuro", 0,
|
||
|
"non", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabo[] =
|
||
|
{
|
||
|
"out", 0,
|
||
|
"over", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabp[] =
|
||
|
{
|
||
|
"para", 0,
|
||
|
"photo", 0,
|
||
|
"pico", 0,
|
||
|
"poly", 0,
|
||
|
"pre", 0,
|
||
|
"pseudo", 0,
|
||
|
"psycho", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabq[] =
|
||
|
{
|
||
|
"quasi", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabr[] =
|
||
|
{
|
||
|
"radio", 0,
|
||
|
"re", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabs[] =
|
||
|
{
|
||
|
"semi", 0,
|
||
|
"stereo", 0,
|
||
|
"sub", 0,
|
||
|
"super", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabt[] =
|
||
|
{
|
||
|
"tele", 0,
|
||
|
"tera", 0,
|
||
|
"thermo", 0,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabu[] =
|
||
|
{
|
||
|
"ultra", 0,
|
||
|
"under", 0, /*must precede un*/
|
||
|
"un", IN,
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabv[] =
|
||
|
{
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabw[] =
|
||
|
{
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabx[] =
|
||
|
{
|
||
|
0
|
||
|
};
|
||
|
Ptab ptaby[] =
|
||
|
{
|
||
|
0
|
||
|
};
|
||
|
Ptab ptabz[] =
|
||
|
{
|
||
|
0
|
||
|
};
|
||
|
|
||
|
Ptab* preftab[] =
|
||
|
{
|
||
|
ptaba,
|
||
|
ptabb,
|
||
|
ptabc,
|
||
|
ptabd,
|
||
|
ptabe,
|
||
|
ptabf,
|
||
|
ptabg,
|
||
|
ptabh,
|
||
|
ptabi,
|
||
|
ptabj,
|
||
|
ptabk,
|
||
|
ptabl,
|
||
|
ptabm,
|
||
|
ptabn,
|
||
|
ptabo,
|
||
|
ptabp,
|
||
|
ptabq,
|
||
|
ptabr,
|
||
|
ptabs,
|
||
|
ptabt,
|
||
|
ptabu,
|
||
|
ptabv,
|
||
|
ptabw,
|
||
|
ptabx,
|
||
|
ptaby,
|
||
|
ptabz,
|
||
|
};
|
||
|
|
||
|
typedef struct {
|
||
|
char *mesg;
|
||
|
enum { NONE, SUFF, PREF} type;
|
||
|
} Deriv;
|
||
|
|
||
|
int aflag;
|
||
|
int cflag;
|
||
|
int fflag;
|
||
|
int vflag;
|
||
|
int xflag;
|
||
|
int nflag;
|
||
|
char word[500];
|
||
|
char* original;
|
||
|
Deriv emptyderiv;
|
||
|
Deriv deriv[DSIZ+3];
|
||
|
char affix[DSIZ*10]; /* 10 is longest affix message */
|
||
|
int prefcount;
|
||
|
int suffcount;
|
||
|
char* acmeid;
|
||
|
char space[300000]; /* must be as large as "words"+"space" in pcode run */
|
||
|
Bits encode[2048]; /* must be as long as "codes" in pcode run */
|
||
|
int nencode;
|
||
|
char voweltab[256];
|
||
|
char* spacep[128*128+1]; /* pointer to words starting with 'xx' */
|
||
|
Biobuf bin;
|
||
|
Biobuf bout;
|
||
|
|
||
|
char* codefile = "#9/lib/amspell";
|
||
|
char* brfile = "#9/lib/brspell";
|
||
|
char* Usage = "usage";
|
||
|
|
||
|
void
|
||
|
main(int argc, char *argv[])
|
||
|
{
|
||
|
char *ep, *cp;
|
||
|
char *dp;
|
||
|
int j, i, c;
|
||
|
int low;
|
||
|
Bits h;
|
||
|
|
||
|
Binit(&bin, 0, OREAD);
|
||
|
Binit(&bout, 1, OWRITE);
|
||
|
for(i=0; c = "aeiouyAEIOUY"[i]; i++)
|
||
|
voweltab[c] = 1;
|
||
|
while(argc > 1) {
|
||
|
if(argv[1][0] != '-')
|
||
|
break;
|
||
|
for(i=1; c = argv[1][i]; i++)
|
||
|
switch(c) {
|
||
|
default:
|
||
|
fprint(2, "usage: spell [-bcCvx] [-f file]\n");
|
||
|
exits(Usage);
|
||
|
|
||
|
case 'a':
|
||
|
aflag++;
|
||
|
continue;
|
||
|
|
||
|
case 'b':
|
||
|
ise();
|
||
|
if(!fflag)
|
||
|
codefile = brfile;
|
||
|
continue;
|
||
|
|
||
|
case 'C': /* for "correct" */
|
||
|
vflag++;
|
||
|
case 'c': /* for ocr */
|
||
|
cflag++;
|
||
|
continue;
|
||
|
|
||
|
case 'v':
|
||
|
vflag++;
|
||
|
continue;
|
||
|
|
||
|
case 'x':
|
||
|
xflag++;
|
||
|
continue;
|
||
|
|
||
|
case 'f':
|
||
|
if(argc <= 2) {
|
||
|
fprint(2, "spell: -f requires another argument\n");
|
||
|
exits(Usage);
|
||
|
}
|
||
|
argv++;
|
||
|
argc--;
|
||
|
codefile = argv[1];
|
||
|
fflag++;
|
||
|
goto brk;
|
||
|
}
|
||
|
brk:
|
||
|
argv++;
|
||
|
argc--;
|
||
|
}
|
||
|
readdict(codefile);
|
||
|
if(argc > 1) {
|
||
|
fprint(2, "usage: spell [-bcCvx] [-f file]\n");
|
||
|
exits(Usage);
|
||
|
}
|
||
|
if(aflag)
|
||
|
cflag = vflag = 0;
|
||
|
|
||
|
for(;;) {
|
||
|
affix[0] = 0;
|
||
|
original = Brdline(&bin, '\n');
|
||
|
if(original == 0)
|
||
|
exits(0);
|
||
|
original[Blinelen(&bin)-1] = 0;
|
||
|
low = 0;
|
||
|
|
||
|
if(aflag) {
|
||
|
acmeid = original;
|
||
|
while(*original != ':')
|
||
|
if(*original++ == 0)
|
||
|
exits(0);
|
||
|
while(*++original != ':')
|
||
|
if(*original == 0)
|
||
|
exits(0);
|
||
|
*original++ = 0;
|
||
|
}
|
||
|
for(ep=word,dp=original; j = *dp; ep++,dp++) {
|
||
|
if(ISLOWER(j))
|
||
|
low++;
|
||
|
if(ep >= word+sizeof(word)-1)
|
||
|
break;
|
||
|
*ep = j;
|
||
|
}
|
||
|
*ep = 0;
|
||
|
|
||
|
if(ISDIGIT(word[0]) && ordinal())
|
||
|
continue;
|
||
|
|
||
|
h = 0;
|
||
|
if(!low && !(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH)))
|
||
|
for(cp=original+1,dp=word+1; dp<ep; dp++,cp++)
|
||
|
*dp = Tolower(*cp);
|
||
|
if(!h)
|
||
|
for(;;) { /* at most twice */
|
||
|
if(h = trypref(ep,".",0,ALL|STOP|DONT_TOUCH))
|
||
|
break;
|
||
|
if(h = trysuff(ep,0,ALL|STOP|DONT_TOUCH))
|
||
|
break;
|
||
|
if(!ISUPPER(word[0]))
|
||
|
break;
|
||
|
cp = original;
|
||
|
dp = word;
|
||
|
while(*dp = *cp++) {
|
||
|
if(!low)
|
||
|
*dp = Tolower(*dp);
|
||
|
dp++;
|
||
|
}
|
||
|
word[0] = Tolower(word[0]);
|
||
|
}
|
||
|
|
||
|
if(cflag) {
|
||
|
if(!h || Set(h,STOP))
|
||
|
print("-");
|
||
|
else if(!vflag)
|
||
|
print("+");
|
||
|
else
|
||
|
print("%c",'0' + (suffcount>0) +
|
||
|
(prefcount>4? 8: 2*prefcount));
|
||
|
} else if(!h || Set(h,STOP)) {
|
||
|
if(aflag)
|
||
|
Bprint(&bout, "%s:%s\n", acmeid, original);
|
||
|
else
|
||
|
Bprint(&bout, "%s\n", original);
|
||
|
} else if(affix[0] != 0 && affix[0] != '.')
|
||
|
print("%s\t%s\n", affix, original);
|
||
|
}
|
||
|
exits(0);
|
||
|
}
|
||
|
|
||
|
/* strip exactly one suffix and do
|
||
|
* indicated routine(s), which may recursively
|
||
|
* strip suffixes
|
||
|
*/
|
||
|
Bits
|
||
|
trysuff(char* ep, int lev, int flag)
|
||
|
{
|
||
|
Suftab *t;
|
||
|
char *cp, *sp;
|
||
|
Bits h = 0;
|
||
|
int initchar = ep[-1];
|
||
|
|
||
|
flag &= ~MONO;
|
||
|
lev += DLEV;
|
||
|
if(lev < DSIZ) {
|
||
|
deriv[lev] = emptyderiv;
|
||
|
deriv[lev-1] = emptyderiv;
|
||
|
}
|
||
|
if(!ISLOWER(initchar))
|
||
|
return h;
|
||
|
for(t=suftab[initchar-'a']; sp=t->suf; t++) {
|
||
|
cp = ep;
|
||
|
while(*sp)
|
||
|
if(*--cp != *sp++)
|
||
|
goto next;
|
||
|
for(sp=ep-t->n1; --sp >= word && !ISVOWEL(*sp);)
|
||
|
;
|
||
|
if(sp < word)
|
||
|
continue;
|
||
|
if(!(t->affixable & flag))
|
||
|
return 0;
|
||
|
h = (*t->p1)(ep-t->n1, t->d1, t->a1, lev+1, t->flag|STOP);
|
||
|
if(!h && t->p2!=0) {
|
||
|
if(lev < DSIZ) {
|
||
|
deriv[lev] = emptyderiv;
|
||
|
deriv[lev+1] = emptyderiv;
|
||
|
}
|
||
|
h = (*t->p2)(ep-t->n2, t->d2, t->a2, lev, t->flag|STOP);
|
||
|
}
|
||
|
break;
|
||
|
next:;
|
||
|
}
|
||
|
return h;
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
nop(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
USED(ep);
|
||
|
USED(d);
|
||
|
USED(a);
|
||
|
USED(lev);
|
||
|
USED(flag);
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
cstrip(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
int temp = ep[0];
|
||
|
|
||
|
if(ISVOWEL(temp) && ISVOWEL(ep[-1])) {
|
||
|
switch(pair(ep[-1],ep[0])) {
|
||
|
case pair('a', 'a'):
|
||
|
case pair('a', 'e'):
|
||
|
case pair('a', 'i'):
|
||
|
case pair('e', 'a'):
|
||
|
case pair('e', 'e'):
|
||
|
case pair('e', 'i'):
|
||
|
case pair('i', 'i'):
|
||
|
case pair('o', 'a'):
|
||
|
return 0;
|
||
|
}
|
||
|
} else
|
||
|
if(temp==ep[-1]&&temp==ep[-2])
|
||
|
return 0;
|
||
|
return strip(ep,d,a,lev,flag);
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
strip(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
Bits h = trypref(ep, a, lev, flag);
|
||
|
|
||
|
USED(d);
|
||
|
if(Set(h,MONO) && ISVOWEL(*ep) && ISVOWEL(ep[-2]))
|
||
|
h = 0;
|
||
|
if(h)
|
||
|
return h;
|
||
|
if(ISVOWEL(*ep) && !ISVOWEL(ep[-1]) && ep[-1]==ep[-2]) {
|
||
|
h = trypref(ep-1,a,lev,flag|MONO);
|
||
|
if(h)
|
||
|
return h;
|
||
|
}
|
||
|
return trysuff(ep,lev,flag);
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
s(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
if(lev > DLEV+1)
|
||
|
return 0;
|
||
|
if(*ep=='s') {
|
||
|
switch(ep[-1]) {
|
||
|
case 'y':
|
||
|
if(ISVOWEL(ep[-2])||ISUPPER(*word))
|
||
|
break; /*says Kennedys*/
|
||
|
case 'x':
|
||
|
case 'z':
|
||
|
case 's':
|
||
|
return 0;
|
||
|
case 'h':
|
||
|
switch(ep[-2]) {
|
||
|
case 'c':
|
||
|
case 's':
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return strip(ep,d,a,lev,flag);
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
an(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
USED(d);
|
||
|
if(!ISUPPER(*word)) /*must be proper name*/
|
||
|
return 0;
|
||
|
return trypref(ep,a,lev,flag);
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
ize(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
int temp = ep[-1];
|
||
|
Bits h;
|
||
|
|
||
|
USED(a);
|
||
|
ep[-1] = 'e';
|
||
|
h = strip(ep,"",d,lev,flag);
|
||
|
ep[-1] = temp;
|
||
|
return h;
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
y_to_e(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
Bits h;
|
||
|
int temp;
|
||
|
|
||
|
USED(a);
|
||
|
switch(ep[-1]) {
|
||
|
case 'a':
|
||
|
case 'e':
|
||
|
case 'i':
|
||
|
return 0;
|
||
|
}
|
||
|
temp = *ep;
|
||
|
*ep++ = 'e';
|
||
|
h = strip(ep,"",d,lev,flag);
|
||
|
ep[-1] = temp;
|
||
|
return h;
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
ily(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
int temp = ep[0];
|
||
|
char *cp = ep;
|
||
|
|
||
|
if(temp==ep[-1]&&temp==ep[-2]) /* sillly */
|
||
|
return 0;
|
||
|
if(*--cp=='y' && !ISVOWEL(*--cp)) /* happyly */
|
||
|
while(cp>word)
|
||
|
if(ISVOWEL(*--cp)) /* shyness */
|
||
|
return 0;
|
||
|
if(ep[-1]=='i')
|
||
|
return i_to_y(ep,d,a,lev,flag);
|
||
|
return cstrip(ep,d,a,lev,flag);
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
bility(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
*ep++ = 'l';
|
||
|
return y_to_e(ep,d,a,lev,flag);
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
i_to_y(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
Bits h;
|
||
|
int temp;
|
||
|
|
||
|
if(ISUPPER(*word))
|
||
|
return 0;
|
||
|
if((temp=ep[-1])=='i' && !ISVOWEL(ep[-2])) {
|
||
|
ep[-1] = 'y';
|
||
|
a = d;
|
||
|
}
|
||
|
h = cstrip(ep,"",a,lev,flag);
|
||
|
ep[-1] = temp;
|
||
|
return h;
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
es(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
if(lev>DLEV)
|
||
|
return 0;
|
||
|
switch(ep[-1]) {
|
||
|
default:
|
||
|
return 0;
|
||
|
case 'i':
|
||
|
return i_to_y(ep,d,a,lev,flag);
|
||
|
case 'h':
|
||
|
switch(ep[-2]) {
|
||
|
default:
|
||
|
return 0;
|
||
|
case 'c':
|
||
|
case 's':
|
||
|
break;
|
||
|
}
|
||
|
case 's':
|
||
|
case 'z':
|
||
|
case 'x':
|
||
|
return strip(ep,d,a,lev,flag);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
subst(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
char *u,*t;
|
||
|
Bits h;
|
||
|
|
||
|
USED(a);
|
||
|
if(skipv(skipv(ep-1)) < word)
|
||
|
return 0;
|
||
|
for(t=d; *t!='+'; t++)
|
||
|
continue;
|
||
|
for(u=ep; *--t!='-';)
|
||
|
*--u = *t;
|
||
|
h = strip(ep,"",d,lev,flag);
|
||
|
while(*++t != '+')
|
||
|
continue;
|
||
|
while(*++t)
|
||
|
*u++ = *t;
|
||
|
return h;
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
tion(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
switch(ep[-2]) {
|
||
|
default:
|
||
|
return trypref(ep,a,lev,flag);
|
||
|
case 'a':
|
||
|
case 'e':
|
||
|
case 'i':
|
||
|
case 'o':
|
||
|
case 'u':
|
||
|
return y_to_e(ep,d,a,lev,flag);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* possible consonant-consonant-e ending
|
||
|
*/
|
||
|
Bits
|
||
|
CCe(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
Bits h;
|
||
|
|
||
|
switch(ep[-1]) {
|
||
|
case 'l':
|
||
|
if(ISVOWEL(ep[-2]))
|
||
|
break;
|
||
|
switch(ep[-2]) {
|
||
|
case 'l':
|
||
|
case 'r':
|
||
|
case 'w':
|
||
|
break;
|
||
|
default:
|
||
|
return y_to_e(ep,d,a,lev,flag);
|
||
|
}
|
||
|
break;
|
||
|
case 'c':
|
||
|
case 'g':
|
||
|
if(*ep == 'a') /* prevent -able for -eable */
|
||
|
return 0;
|
||
|
case 's':
|
||
|
case 'v':
|
||
|
case 'z':
|
||
|
if(ep[-2]==ep[-1])
|
||
|
break;
|
||
|
if(ISVOWEL(ep[-2]))
|
||
|
break;
|
||
|
case 'u':
|
||
|
if(h = y_to_e(ep,d,a,lev,flag))
|
||
|
return h;
|
||
|
if(!(ep[-2]=='n' && ep[-1]=='g'))
|
||
|
return 0;
|
||
|
}
|
||
|
return VCe(ep,d,a,lev,flag);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* possible consonant-vowel-consonant-e ending
|
||
|
*/
|
||
|
Bits
|
||
|
VCe(char* ep, char* d, char* a, int lev, int flag)
|
||
|
{
|
||
|
int c;
|
||
|
Bits h;
|
||
|
|
||
|
c = ep[-1];
|
||
|
if(c=='e')
|
||
|
return 0;
|
||
|
if(!ISVOWEL(c) && ISVOWEL(ep[-2])) {
|
||
|
c = *ep;
|
||
|
*ep++ = 'e';
|
||
|
h = trypref(ep,d,lev,flag);
|
||
|
if(!h)
|
||
|
h = trysuff(ep,lev,flag);
|
||
|
if(h)
|
||
|
return h;
|
||
|
ep--;
|
||
|
*ep = c;
|
||
|
}
|
||
|
return cstrip(ep,d,a,lev,flag);
|
||
|
}
|
||
|
|
||
|
Ptab*
|
||
|
lookuppref(uchar** wp, char* ep)
|
||
|
{
|
||
|
Ptab *sp;
|
||
|
uchar *bp,*cp;
|
||
|
unsigned int initchar = Tolower(**wp);
|
||
|
|
||
|
if(!ISALPHA(initchar))
|
||
|
return 0;
|
||
|
for(sp=preftab[initchar-'a'];sp->s;sp++) {
|
||
|
bp = *wp;
|
||
|
for(cp= (uchar*)sp->s;*cp; )
|
||
|
if(*bp++!=*cp++)
|
||
|
goto next;
|
||
|
for(cp=bp;cp<(uchar*)ep;cp++)
|
||
|
if(ISVOWEL(*cp)) {
|
||
|
*wp = bp;
|
||
|
return sp;
|
||
|
}
|
||
|
next:;
|
||
|
}
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* while word is not in dictionary try stripping
|
||
|
* prefixes. Fail if no more prefixes.
|
||
|
*/
|
||
|
Bits
|
||
|
trypref(char* ep, char* a, int lev, int flag)
|
||
|
{
|
||
|
Ptab *tp;
|
||
|
char *bp, *cp;
|
||
|
char *pp;
|
||
|
Bits h;
|
||
|
char space[20];
|
||
|
|
||
|
if(lev<DSIZ) {
|
||
|
deriv[lev].mesg = a;
|
||
|
deriv[lev].type = *a=='.'? NONE: SUFF;
|
||
|
}
|
||
|
if(h = tryword(word,ep,lev,flag)) {
|
||
|
if(Set(h, flag&~MONO) && (flag&MONO) <= Set(h, MONO))
|
||
|
return h;
|
||
|
h = 0;
|
||
|
}
|
||
|
bp = word;
|
||
|
pp = space;
|
||
|
if(lev<DSIZ) {
|
||
|
deriv[lev+1].mesg = pp;
|
||
|
deriv[lev+1].type = 0;
|
||
|
}
|
||
|
while(tp=lookuppref((uchar**)&bp,ep)) {
|
||
|
*pp++ = '+';
|
||
|
cp = tp->s;
|
||
|
while(pp<space+sizeof(space) && (*pp = *cp++))
|
||
|
pp++;
|
||
|
deriv[lev+1].type += PREF;
|
||
|
h = tryword(bp,ep,lev+1,flag);
|
||
|
if(Set(h,NOPREF) ||
|
||
|
((tp->flag&IN) && inun(bp-2,h)==0)) {
|
||
|
h = 0;
|
||
|
break;
|
||
|
}
|
||
|
if(Set(h,flag&~MONO) && (flag&MONO) <= Set(h, MONO))
|
||
|
break;
|
||
|
h = 0;
|
||
|
}
|
||
|
if(lev < DSIZ) {
|
||
|
deriv[lev+1] = emptyderiv;
|
||
|
deriv[lev+2] = emptyderiv;
|
||
|
}
|
||
|
return h;
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
tryword(char* bp, char* ep, int lev, int flag)
|
||
|
{
|
||
|
int j;
|
||
|
Bits h = 0;
|
||
|
char duple[3];
|
||
|
|
||
|
if(ep-bp <= 1)
|
||
|
return h;
|
||
|
if(flag&MONO) {
|
||
|
if(lev<DSIZ) {
|
||
|
deriv[++lev].mesg = duple;
|
||
|
deriv[lev].type = SUFF;
|
||
|
}
|
||
|
duple[0] = '+';
|
||
|
duple[1] = *ep;
|
||
|
duple[2] = 0;
|
||
|
}
|
||
|
h = dict(bp, ep);
|
||
|
if(vflag==0 || h==0)
|
||
|
return h;
|
||
|
/*
|
||
|
* when derivations are wanted, collect them
|
||
|
* for printing
|
||
|
*/
|
||
|
j = lev;
|
||
|
prefcount = suffcount = 0;
|
||
|
do {
|
||
|
if(j<DSIZ && deriv[j].type) {
|
||
|
strcat(affix, deriv[j].mesg);
|
||
|
if(deriv[j].type == SUFF)
|
||
|
suffcount++;
|
||
|
else if(deriv[j].type != NONE)
|
||
|
prefcount = deriv[j].type/PREF;
|
||
|
}
|
||
|
} while(--j > 0);
|
||
|
return h;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
inun(char* bp, Bits h)
|
||
|
{
|
||
|
if(*bp == 'u')
|
||
|
return Set(h, IN) == 0;
|
||
|
/* *bp == 'i' */
|
||
|
if(Set(h, IN) == 0)
|
||
|
return 0;
|
||
|
switch(bp[2]) {
|
||
|
case 'r':
|
||
|
return bp[1] == 'r';
|
||
|
case 'm':
|
||
|
case 'p':
|
||
|
return bp[1] == 'm';
|
||
|
}
|
||
|
return bp[1] == 'n';
|
||
|
}
|
||
|
|
||
|
char*
|
||
|
skipv(char *s)
|
||
|
{
|
||
|
if(s >= word && ISVOWEL(*s))
|
||
|
s--;
|
||
|
while(s >= word && !ISVOWEL(*s))
|
||
|
s--;
|
||
|
return s;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* crummy way to Britishise
|
||
|
*/
|
||
|
void
|
||
|
ise(void)
|
||
|
{
|
||
|
Suftab *p;
|
||
|
int i;
|
||
|
|
||
|
for(i=0; i<26; i++)
|
||
|
for(p = suftab[i]; p->suf; p++) {
|
||
|
p->suf = ztos(p->suf);
|
||
|
p->d1 = ztos(p->d1);
|
||
|
p->a1 = ztos(p->a1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
char*
|
||
|
ztos(char *as)
|
||
|
{
|
||
|
char *s, *ds;
|
||
|
|
||
|
for(s=as; *s; s++)
|
||
|
if(*s == 'z')
|
||
|
goto copy;
|
||
|
return as;
|
||
|
|
||
|
copy:
|
||
|
ds = strdup(as);
|
||
|
for(s=ds; *s; s++)
|
||
|
if(*s == 'z')
|
||
|
*s = 's';
|
||
|
return ds;
|
||
|
}
|
||
|
|
||
|
Bits
|
||
|
dict(char* bp, char* ep)
|
||
|
{
|
||
|
char *cp, *cp1, *w, *wp, *we;
|
||
|
int n, f;
|
||
|
|
||
|
w = bp;
|
||
|
we = ep;
|
||
|
n = ep-bp;
|
||
|
if(n <= 1)
|
||
|
return NOUN;
|
||
|
|
||
|
f = w[0] & 0x7f;
|
||
|
f *= 128;
|
||
|
f += w[1] & 0x7f;
|
||
|
bp = spacep[f];
|
||
|
ep = spacep[f+1];
|
||
|
|
||
|
loop:
|
||
|
if(bp >= ep) {
|
||
|
if(xflag)
|
||
|
fprint(2, "=%.*s\n", utfnlen(w, n), w);
|
||
|
return 0;
|
||
|
}
|
||
|
/*
|
||
|
* find the beginning of some word in the middle
|
||
|
*/
|
||
|
cp = bp + (ep-bp)/2;
|
||
|
|
||
|
while(cp > bp && !(*cp & 0x80))
|
||
|
cp--;
|
||
|
while(cp > bp && (cp[-1] & 0x80))
|
||
|
cp--;
|
||
|
|
||
|
wp = w + 2; /* skip two letters */
|
||
|
cp1 = cp + 2; /* skip affix code */
|
||
|
for(;;) {
|
||
|
if(wp >= we) {
|
||
|
if(*cp1 & 0x80)
|
||
|
goto found;
|
||
|
else
|
||
|
f = 1;
|
||
|
break;
|
||
|
}
|
||
|
if(*cp1 & 0x80) {
|
||
|
f = -1;
|
||
|
break;
|
||
|
}
|
||
|
f = *cp1++ - *wp++;
|
||
|
if(f != 0)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if(f < 0) {
|
||
|
while(!(*cp1 & 0x80))
|
||
|
cp1++;
|
||
|
bp = cp1;
|
||
|
goto loop;
|
||
|
}
|
||
|
ep = cp;
|
||
|
goto loop;
|
||
|
|
||
|
found:
|
||
|
f = ((cp[0] & 0x7) << 8) |
|
||
|
(cp[1] & 0xff);
|
||
|
if(xflag) {
|
||
|
fprint(2, "=%.*s ", utfnlen(w, n), w);
|
||
|
typeprint(encode[f]);
|
||
|
}
|
||
|
return encode[f];
|
||
|
}
|
||
|
|
||
|
void
|
||
|
typeprint(Bits h)
|
||
|
{
|
||
|
|
||
|
pcomma("");
|
||
|
if(h & NOUN)
|
||
|
pcomma("n");
|
||
|
if(h & PROP_COLLECT)
|
||
|
pcomma("pc");
|
||
|
if(h & VERB) {
|
||
|
if((h & VERB) == VERB)
|
||
|
pcomma("v");
|
||
|
else
|
||
|
if((h & VERB) == V_IRREG)
|
||
|
pcomma("vi");
|
||
|
else
|
||
|
if(h & ED)
|
||
|
pcomma("ed");
|
||
|
}
|
||
|
if(h & ADJ)
|
||
|
pcomma("a");
|
||
|
if(h & COMP) {
|
||
|
if((h & COMP) == ACTOR)
|
||
|
pcomma("er");
|
||
|
else
|
||
|
pcomma("comp");
|
||
|
}
|
||
|
if(h & DONT_TOUCH)
|
||
|
pcomma("d");
|
||
|
if(h & N_AFFIX)
|
||
|
pcomma("na");
|
||
|
if(h & ADV)
|
||
|
pcomma("adv");
|
||
|
if(h & ION)
|
||
|
pcomma("ion");
|
||
|
if(h & V_AFFIX)
|
||
|
pcomma("va");
|
||
|
if(h & MAN)
|
||
|
pcomma("man");
|
||
|
if(h & NOPREF)
|
||
|
pcomma("nopref");
|
||
|
if(h & MONO)
|
||
|
pcomma("ms");
|
||
|
if(h & IN)
|
||
|
pcomma("in");
|
||
|
if(h & _Y)
|
||
|
pcomma("y");
|
||
|
if(h & STOP)
|
||
|
pcomma("s");
|
||
|
fprint(2, "\n");
|
||
|
}
|
||
|
|
||
|
void
|
||
|
pcomma(char *s)
|
||
|
{
|
||
|
static int flag;
|
||
|
|
||
|
if(*s == 0) {
|
||
|
flag = 0;
|
||
|
return;
|
||
|
}
|
||
|
if(!flag) {
|
||
|
fprint(2, "%s", s);
|
||
|
flag = 1;
|
||
|
} else
|
||
|
fprint(2, ",%s", s);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* is the word on of the following
|
||
|
* 12th teen
|
||
|
* 21st end in 1
|
||
|
* 23rd end in 3
|
||
|
* 77th default
|
||
|
* called knowing word[0] is a digit
|
||
|
*/
|
||
|
int
|
||
|
ordinal(void)
|
||
|
{
|
||
|
char *cp = word;
|
||
|
static char sp[4];
|
||
|
|
||
|
while(ISDIGIT(*cp))
|
||
|
cp++;
|
||
|
strncpy(sp,cp,3);
|
||
|
if(ISUPPER(cp[0]) && ISUPPER(cp[1])) {
|
||
|
sp[0] = Tolower(cp[0]);
|
||
|
sp[1] = Tolower(cp[1]);
|
||
|
}
|
||
|
return 0 == strncmp(sp,
|
||
|
cp[-2]=='1'? "th": /* out of bounds if 1 digit */
|
||
|
*--cp=='1'? "st": /* harmless */
|
||
|
*cp=='2'? "nd":
|
||
|
*cp=='3'? "rd":
|
||
|
"th", 3);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* read in the dictionary.
|
||
|
* format is
|
||
|
* {
|
||
|
* short nencode;
|
||
|
* long encode[nencode];
|
||
|
* char space[*];
|
||
|
* };
|
||
|
*
|
||
|
* the encodings are a table all different
|
||
|
* affixes.
|
||
|
* the dictionary proper has 2 bytes
|
||
|
* that demark and then the rest of the
|
||
|
* word. the 2 bytes have the following
|
||
|
* 0x80 0x00 flag
|
||
|
* 0x78 0x00 count of prefix bytes
|
||
|
* common with prev word
|
||
|
* 0x07 0xff affix code
|
||
|
*
|
||
|
* all ints are big endians in the file.
|
||
|
*/
|
||
|
void
|
||
|
readdict(char *file)
|
||
|
{
|
||
|
char *s, *is, *lasts, *ls;
|
||
|
int c, i, sp, p;
|
||
|
int f;
|
||
|
long l;
|
||
|
|
||
|
lasts = 0;
|
||
|
f = open(file, 0);
|
||
|
if(f == -1) {
|
||
|
fprint(2, "cannot open %s\n", file);
|
||
|
exits("open");
|
||
|
}
|
||
|
if(read(f, space, 2) != 2)
|
||
|
goto bad;
|
||
|
nencode = ((space[0]&0xff)<<8) | (space[1]&0xff);
|
||
|
if(read(f, space, 4*nencode) != 4*nencode)
|
||
|
goto bad;
|
||
|
s = space;
|
||
|
for(i=0; i<nencode; i++) {
|
||
|
l = (long)(s[0] & 0xff) << 24;
|
||
|
l |= (s[1] & 0xff) << 16;
|
||
|
l |= (s[2] & 0xff) << 8;
|
||
|
l |= s[3] & 0xff;
|
||
|
encode[i] = (Bits)l;
|
||
|
s += 4;
|
||
|
}
|
||
|
l = read(f, space, sizeof(space));
|
||
|
if(l == sizeof(space))
|
||
|
goto noroom;
|
||
|
is = space + (sizeof(space) - l);
|
||
|
memmove(is, space, l);
|
||
|
|
||
|
s = space;
|
||
|
c = *is++ & 0xff;
|
||
|
sp = -1;
|
||
|
i = 0;
|
||
|
|
||
|
loop:
|
||
|
if(s > is)
|
||
|
goto noroom;
|
||
|
if(c < 0) {
|
||
|
close(f);
|
||
|
while(sp < 128*128)
|
||
|
spacep[++sp] = s;
|
||
|
*s = 0x80; /* fence */
|
||
|
return;
|
||
|
}
|
||
|
p = (c>>3) & 0xf;
|
||
|
*s++ = c;
|
||
|
*s++ = *is++ & 0xff;
|
||
|
if(p <= 0)
|
||
|
i = (*is++ & 0xff)*128;
|
||
|
if(p <= 1) {
|
||
|
if(!(*is & 0x80))
|
||
|
i = i/128*128 + (*is++ & 0xff);
|
||
|
if(i <= sp) {
|
||
|
fprint(2, "the dict isnt sorted or \n");
|
||
|
fprint(2, "memmove didn't work\n");
|
||
|
goto bad;
|
||
|
}
|
||
|
while(sp < i)
|
||
|
spacep[++sp] = s-2;
|
||
|
}
|
||
|
ls = lasts;
|
||
|
lasts = s;
|
||
|
for(p-=2; p>0; p--)
|
||
|
*s++ = *ls++;
|
||
|
for(;;) {
|
||
|
if(is >= space+sizeof(space)) {
|
||
|
c = -1;
|
||
|
break;
|
||
|
}
|
||
|
c = *is++ & 0xff;
|
||
|
if(c & 0x80)
|
||
|
break;
|
||
|
*s++ = c;
|
||
|
}
|
||
|
*s = 0;
|
||
|
goto loop;
|
||
|
|
||
|
bad:
|
||
|
fprint(2, "trouble reading %s\n", file);
|
||
|
exits("read");
|
||
|
noroom:
|
||
|
fprint(2, "not enough space for dictionary\n");
|
||
|
exits("space");
|
||
|
}
|