2004-04-19 18:13:05 +00:00
|
|
|
|
#include <u.h>
|
|
|
|
|
#include <libc.h>
|
|
|
|
|
#include <bio.h>
|
|
|
|
|
/* Macros for Rune support of ctype.h-like functions */
|
|
|
|
|
|
2004-04-19 23:03:46 +00:00
|
|
|
|
#undef isupper
|
|
|
|
|
#undef islower
|
|
|
|
|
#undef isalpha
|
|
|
|
|
#undef isdigit
|
|
|
|
|
#undef isalnum
|
|
|
|
|
#undef isspace
|
|
|
|
|
#undef tolower
|
2005-05-07 22:42:06 +00:00
|
|
|
|
#define isupper(r) ('A' <= (r) && (r) <= 'Z')
|
|
|
|
|
#define islower(r) ('a' <= (r) && (r) <= 'z')
|
2004-04-19 18:13:05 +00:00
|
|
|
|
#define isalpha(r) (isupper(r) || islower(r))
|
|
|
|
|
#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
|
|
|
|
|
|
2005-05-07 22:42:06 +00:00
|
|
|
|
#define isdigit(r) ('0' <= (r) && (r) <= '9')
|
2004-04-19 18:13:05 +00:00
|
|
|
|
|
|
|
|
|
#define isalnum(r) (isalpha(r) || isdigit(r))
|
|
|
|
|
|
2005-05-07 22:42:06 +00:00
|
|
|
|
#define isspace(r) ((r) == ' ' || (r) == '\t' \
|
2004-04-19 18:13:05 +00:00
|
|
|
|
|| (0x0A <= (r) && (r) <= 0x0D))
|
|
|
|
|
|
|
|
|
|
#define tolower(r) ((r)-'A'+'a')
|
|
|
|
|
|
|
|
|
|
#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
|
|
|
|
|
|
|
|
|
|
#define WORDSIZ 4000
|
|
|
|
|
char *filename = "#9/lib/words";
|
|
|
|
|
Biobuf *dfile;
|
|
|
|
|
Biobuf bout;
|
|
|
|
|
Biobuf bin;
|
|
|
|
|
|
|
|
|
|
int fold;
|
|
|
|
|
int direc;
|
|
|
|
|
int exact;
|
|
|
|
|
int iflag;
|
|
|
|
|
int rev = 1; /*-1 for reverse-ordered file, not implemented*/
|
|
|
|
|
int (*compare)(Rune*, Rune*);
|
|
|
|
|
Rune tab = '\t';
|
|
|
|
|
Rune entry[WORDSIZ];
|
|
|
|
|
Rune word[WORDSIZ];
|
|
|
|
|
Rune key[50], orig[50];
|
|
|
|
|
Rune latin_fold_tab[] =
|
|
|
|
|
{
|
|
|
|
|
/* Table to fold latin 1 characters to ASCII equivalents
|
|
|
|
|
based at Rune value 0xc0
|
|
|
|
|
|
|
|
|
|
À Á Â Ã Ä Å Æ Ç
|
|
|
|
|
È É Ê Ë Ì Í Î Ï
|
|
|
|
|
Ð Ñ Ò Ó Ô Õ Ö ×
|
|
|
|
|
Ø Ù Ú Û Ü Ý Þ ß
|
|
|
|
|
à á â ã ä å æ ç
|
|
|
|
|
è é ê ë ì í î ï
|
|
|
|
|
ð ñ ò ó ô õ ö ÷
|
|
|
|
|
ø ù ú û ü ý þ ÿ
|
|
|
|
|
*/
|
|
|
|
|
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
|
|
|
|
|
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
|
|
|
|
|
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
|
|
|
|
|
'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
|
|
|
|
|
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
|
|
|
|
|
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
|
|
|
|
|
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
|
|
|
|
|
'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
int locate(void);
|
|
|
|
|
int acomp(Rune*, Rune*);
|
|
|
|
|
int getword(Biobuf*, Rune *rp, int n);
|
|
|
|
|
void torune(char*, Rune*);
|
|
|
|
|
void rcanon(Rune*, Rune*);
|
|
|
|
|
int ncomp(Rune*, Rune*);
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
|
{
|
|
|
|
|
int n;
|
|
|
|
|
|
|
|
|
|
filename = unsharp(filename);
|
|
|
|
|
|
|
|
|
|
Binit(&bin, 0, OREAD);
|
|
|
|
|
Binit(&bout, 1, OWRITE);
|
|
|
|
|
compare = acomp;
|
|
|
|
|
ARGBEGIN{
|
|
|
|
|
case 'd':
|
|
|
|
|
direc++;
|
|
|
|
|
break;
|
|
|
|
|
case 'f':
|
|
|
|
|
fold++;
|
|
|
|
|
break;
|
|
|
|
|
case 'i':
|
|
|
|
|
iflag++;
|
|
|
|
|
break;
|
|
|
|
|
case 'n':
|
|
|
|
|
compare = ncomp;
|
|
|
|
|
break;
|
|
|
|
|
case 't':
|
|
|
|
|
chartorune(&tab,ARGF());
|
|
|
|
|
break;
|
|
|
|
|
case 'x':
|
|
|
|
|
exact++;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
fprint(2, "%s: bad option %c\n", argv0, ARGC());
|
|
|
|
|
fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
|
|
|
|
|
exits("usage");
|
|
|
|
|
} ARGEND
|
|
|
|
|
if(!iflag){
|
|
|
|
|
if(argc >= 1) {
|
|
|
|
|
torune(argv[0], orig);
|
|
|
|
|
argv++;
|
|
|
|
|
argc--;
|
|
|
|
|
} else
|
|
|
|
|
iflag++;
|
|
|
|
|
}
|
|
|
|
|
if(argc < 1) {
|
|
|
|
|
direc++;
|
|
|
|
|
fold++;
|
|
|
|
|
} else
|
|
|
|
|
filename = argv[0];
|
|
|
|
|
if (!iflag)
|
|
|
|
|
rcanon(orig, key);
|
|
|
|
|
dfile = Bopen(filename, OREAD);
|
|
|
|
|
if(dfile == 0) {
|
|
|
|
|
fprint(2, "look: can't open %s\n", filename);
|
|
|
|
|
exits("no dictionary");
|
|
|
|
|
}
|
|
|
|
|
if(!iflag)
|
|
|
|
|
if(!locate())
|
|
|
|
|
exits("not found");
|
|
|
|
|
do {
|
|
|
|
|
if(iflag) {
|
|
|
|
|
Bflush(&bout);
|
|
|
|
|
if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
|
|
|
|
|
exits(0);
|
|
|
|
|
rcanon(orig, key);
|
|
|
|
|
if(!locate())
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (!exact || !acomp(word, key))
|
|
|
|
|
Bprint(&bout, "%S\n", entry);
|
|
|
|
|
while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
|
|
|
|
|
rcanon(entry, word);
|
|
|
|
|
n = compare(key, word);
|
|
|
|
|
switch(n) {
|
|
|
|
|
case -1:
|
|
|
|
|
if(exact)
|
|
|
|
|
break;
|
|
|
|
|
case 0:
|
|
|
|
|
if (!exact || !acomp(word, orig))
|
|
|
|
|
Bprint(&bout, "%S\n", entry);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} while(iflag);
|
|
|
|
|
exits(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
locate(void)
|
|
|
|
|
{
|
2004-06-13 20:08:00 +00:00
|
|
|
|
vlong top, bot, mid;
|
|
|
|
|
int c;
|
2004-04-19 18:13:05 +00:00
|
|
|
|
int n;
|
|
|
|
|
|
|
|
|
|
bot = 0;
|
|
|
|
|
top = Bseek(dfile, 0L, 2);
|
|
|
|
|
for(;;) {
|
|
|
|
|
mid = (top+bot) / 2;
|
|
|
|
|
Bseek(dfile, mid, 0);
|
|
|
|
|
do
|
|
|
|
|
c = Bgetrune(dfile);
|
|
|
|
|
while(c>=0 && c!='\n');
|
|
|
|
|
mid = Boffset(dfile);
|
|
|
|
|
if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
|
|
|
|
|
break;
|
|
|
|
|
rcanon(entry, word);
|
|
|
|
|
n = compare(key, word);
|
|
|
|
|
switch(n) {
|
|
|
|
|
case -2:
|
|
|
|
|
case -1:
|
|
|
|
|
case 0:
|
|
|
|
|
if(top <= mid)
|
|
|
|
|
break;
|
|
|
|
|
top = mid;
|
|
|
|
|
continue;
|
|
|
|
|
case 1:
|
|
|
|
|
case 2:
|
|
|
|
|
bot = mid;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
Bseek(dfile, bot, 0);
|
|
|
|
|
while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
|
|
|
|
|
rcanon(entry, word);
|
|
|
|
|
n = compare(key, word);
|
|
|
|
|
switch(n) {
|
|
|
|
|
case -2:
|
|
|
|
|
return 0;
|
|
|
|
|
case -1:
|
|
|
|
|
if(exact)
|
|
|
|
|
return 0;
|
|
|
|
|
case 0:
|
|
|
|
|
return 1;
|
|
|
|
|
case 1:
|
|
|
|
|
case 2:
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* acomp(s, t) returns:
|
|
|
|
|
* -2 if s strictly precedes t
|
|
|
|
|
* -1 if s is a prefix of t
|
|
|
|
|
* 0 if s is the same as t
|
|
|
|
|
* 1 if t is a prefix of s
|
|
|
|
|
* 2 if t strictly precedes s
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
acomp(Rune *s, Rune *t)
|
|
|
|
|
{
|
|
|
|
|
int cs, ct;
|
|
|
|
|
|
|
|
|
|
for(;;) {
|
|
|
|
|
cs = *s;
|
|
|
|
|
ct = *t;
|
|
|
|
|
if(cs != ct)
|
|
|
|
|
break;
|
|
|
|
|
if(cs == 0)
|
|
|
|
|
return 0;
|
|
|
|
|
s++;
|
|
|
|
|
t++;
|
|
|
|
|
}
|
|
|
|
|
if(cs == 0)
|
|
|
|
|
return -1;
|
|
|
|
|
if(ct == 0)
|
|
|
|
|
return 1;
|
|
|
|
|
if(cs < ct)
|
|
|
|
|
return -2;
|
|
|
|
|
return 2;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
torune(char *old, Rune *new)
|
|
|
|
|
{
|
|
|
|
|
do old += chartorune(new, old);
|
|
|
|
|
while(*new++);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
rcanon(Rune *old, Rune *new)
|
|
|
|
|
{
|
|
|
|
|
Rune r;
|
|
|
|
|
|
|
|
|
|
while((r = *old++) && r != tab) {
|
|
|
|
|
if (islatin1(r) && latin_fold_tab[r-0xc0])
|
|
|
|
|
r = latin_fold_tab[r-0xc0];
|
|
|
|
|
if(direc)
|
2005-05-07 22:42:06 +00:00
|
|
|
|
if(!(isalnum(r) || r == ' ' || r == '\t'))
|
2004-04-19 18:13:05 +00:00
|
|
|
|
continue;
|
|
|
|
|
if(fold)
|
|
|
|
|
if(isupper(r))
|
|
|
|
|
r = tolower(r);
|
|
|
|
|
*new++ = r;
|
|
|
|
|
}
|
|
|
|
|
*new = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
ncomp(Rune *s, Rune *t)
|
|
|
|
|
{
|
|
|
|
|
Rune *is, *it, *js, *jt;
|
|
|
|
|
int a, b;
|
|
|
|
|
int ssgn, tsgn;
|
|
|
|
|
|
|
|
|
|
while(isspace(*s))
|
|
|
|
|
s++;
|
|
|
|
|
while(isspace(*t))
|
|
|
|
|
t++;
|
|
|
|
|
ssgn = tsgn = -2*rev;
|
|
|
|
|
if(*s == '-') {
|
|
|
|
|
s++;
|
|
|
|
|
ssgn = -ssgn;
|
|
|
|
|
}
|
|
|
|
|
if(*t == '-') {
|
|
|
|
|
t++;
|
|
|
|
|
tsgn = -tsgn;
|
|
|
|
|
}
|
|
|
|
|
for(is = s; isdigit(*is); is++)
|
|
|
|
|
;
|
|
|
|
|
for(it = t; isdigit(*it); it++)
|
|
|
|
|
;
|
|
|
|
|
js = is;
|
|
|
|
|
jt = it;
|
|
|
|
|
a = 0;
|
|
|
|
|
if(ssgn == tsgn)
|
|
|
|
|
while(it>t && is>s)
|
|
|
|
|
if(b = *--it - *--is)
|
|
|
|
|
a = b;
|
|
|
|
|
while(is > s)
|
|
|
|
|
if(*--is != '0')
|
|
|
|
|
return -ssgn;
|
|
|
|
|
while(it > t)
|
|
|
|
|
if(*--it != '0')
|
|
|
|
|
return tsgn;
|
|
|
|
|
if(a)
|
|
|
|
|
return sgn(a)*ssgn;
|
|
|
|
|
if(*(s=js) == '.')
|
|
|
|
|
s++;
|
|
|
|
|
if(*(t=jt) == '.')
|
|
|
|
|
t++;
|
|
|
|
|
if(ssgn == tsgn)
|
|
|
|
|
while(isdigit(*s) && isdigit(*t))
|
|
|
|
|
if(a = *t++ - *s++)
|
|
|
|
|
return sgn(a)*ssgn;
|
|
|
|
|
while(isdigit(*s))
|
|
|
|
|
if(*s++ != '0')
|
|
|
|
|
return -ssgn;
|
|
|
|
|
while(isdigit(*t))
|
|
|
|
|
if(*t++ != '0')
|
|
|
|
|
return tsgn;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
getword(Biobuf *f, Rune *rp, int n)
|
|
|
|
|
{
|
|
|
|
|
long c;
|
|
|
|
|
|
|
|
|
|
while(n-- > 0) {
|
|
|
|
|
c = Bgetrune(f);
|
|
|
|
|
if(c < 0)
|
|
|
|
|
return 0;
|
|
|
|
|
if(c == '\n') {
|
2005-05-07 22:42:06 +00:00
|
|
|
|
*rp = '\0';
|
2004-04-19 18:13:05 +00:00
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
*rp++ = c;
|
|
|
|
|
}
|
|
|
|
|
fprint(2, "Look: word too long. Bailing out.\n");
|
|
|
|
|
return 0;
|
|
|
|
|
}
|