plan9port/src/cmd/look.c
2005-05-07 22:42:06 +00:00

349 lines
6 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <u.h>
#include <libc.h>
#include <bio.h>
/* Macros for Rune support of ctype.h-like functions */
#undef isupper
#undef islower
#undef isalpha
#undef isdigit
#undef isalnum
#undef isspace
#undef tolower
#define isupper(r) ('A' <= (r) && (r) <= 'Z')
#define islower(r) ('a' <= (r) && (r) <= 'z')
#define isalpha(r) (isupper(r) || islower(r))
#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
#define isdigit(r) ('0' <= (r) && (r) <= '9')
#define isalnum(r) (isalpha(r) || isdigit(r))
#define isspace(r) ((r) == ' ' || (r) == '\t' \
|| (0x0A <= (r) && (r) <= 0x0D))
#define tolower(r) ((r)-'A'+'a')
#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
#define WORDSIZ 4000
char *filename = "#9/lib/words";
Biobuf *dfile;
Biobuf bout;
Biobuf bin;
int fold;
int direc;
int exact;
int iflag;
int rev = 1; /*-1 for reverse-ordered file, not implemented*/
int (*compare)(Rune*, Rune*);
Rune tab = '\t';
Rune entry[WORDSIZ];
Rune word[WORDSIZ];
Rune key[50], orig[50];
Rune latin_fold_tab[] =
{
/* Table to fold latin 1 characters to ASCII equivalents
based at Rune value 0xc0
À Á Â Ã Ä Å Æ Ç
È É Ê Ë Ì Í Î Ï
Ð Ñ Ò Ó Ô Õ Ö ×
Ø Ù Ú Û Ü Ý Þ ß
à á â ã ä å æ ç
è é ê ë ì í î ï
ð ñ ò ó ô õ ö ÷
ø ù ú û ü ý þ ÿ
*/
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
};
int locate(void);
int acomp(Rune*, Rune*);
int getword(Biobuf*, Rune *rp, int n);
void torune(char*, Rune*);
void rcanon(Rune*, Rune*);
int ncomp(Rune*, Rune*);
void
main(int argc, char *argv[])
{
int n;
filename = unsharp(filename);
Binit(&bin, 0, OREAD);
Binit(&bout, 1, OWRITE);
compare = acomp;
ARGBEGIN{
case 'd':
direc++;
break;
case 'f':
fold++;
break;
case 'i':
iflag++;
break;
case 'n':
compare = ncomp;
break;
case 't':
chartorune(&tab,ARGF());
break;
case 'x':
exact++;
break;
default:
fprint(2, "%s: bad option %c\n", argv0, ARGC());
fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
exits("usage");
} ARGEND
if(!iflag){
if(argc >= 1) {
torune(argv[0], orig);
argv++;
argc--;
} else
iflag++;
}
if(argc < 1) {
direc++;
fold++;
} else
filename = argv[0];
if (!iflag)
rcanon(orig, key);
dfile = Bopen(filename, OREAD);
if(dfile == 0) {
fprint(2, "look: can't open %s\n", filename);
exits("no dictionary");
}
if(!iflag)
if(!locate())
exits("not found");
do {
if(iflag) {
Bflush(&bout);
if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
exits(0);
rcanon(orig, key);
if(!locate())
continue;
}
if (!exact || !acomp(word, key))
Bprint(&bout, "%S\n", entry);
while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
rcanon(entry, word);
n = compare(key, word);
switch(n) {
case -1:
if(exact)
break;
case 0:
if (!exact || !acomp(word, orig))
Bprint(&bout, "%S\n", entry);
continue;
}
break;
}
} while(iflag);
exits(0);
}
int
locate(void)
{
vlong top, bot, mid;
int c;
int n;
bot = 0;
top = Bseek(dfile, 0L, 2);
for(;;) {
mid = (top+bot) / 2;
Bseek(dfile, mid, 0);
do
c = Bgetrune(dfile);
while(c>=0 && c!='\n');
mid = Boffset(dfile);
if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
break;
rcanon(entry, word);
n = compare(key, word);
switch(n) {
case -2:
case -1:
case 0:
if(top <= mid)
break;
top = mid;
continue;
case 1:
case 2:
bot = mid;
continue;
}
break;
}
Bseek(dfile, bot, 0);
while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
rcanon(entry, word);
n = compare(key, word);
switch(n) {
case -2:
return 0;
case -1:
if(exact)
return 0;
case 0:
return 1;
case 1:
case 2:
continue;
}
}
return 0;
}
/*
* acomp(s, t) returns:
* -2 if s strictly precedes t
* -1 if s is a prefix of t
* 0 if s is the same as t
* 1 if t is a prefix of s
* 2 if t strictly precedes s
*/
int
acomp(Rune *s, Rune *t)
{
int cs, ct;
for(;;) {
cs = *s;
ct = *t;
if(cs != ct)
break;
if(cs == 0)
return 0;
s++;
t++;
}
if(cs == 0)
return -1;
if(ct == 0)
return 1;
if(cs < ct)
return -2;
return 2;
}
void
torune(char *old, Rune *new)
{
do old += chartorune(new, old);
while(*new++);
}
void
rcanon(Rune *old, Rune *new)
{
Rune r;
while((r = *old++) && r != tab) {
if (islatin1(r) && latin_fold_tab[r-0xc0])
r = latin_fold_tab[r-0xc0];
if(direc)
if(!(isalnum(r) || r == ' ' || r == '\t'))
continue;
if(fold)
if(isupper(r))
r = tolower(r);
*new++ = r;
}
*new = 0;
}
int
ncomp(Rune *s, Rune *t)
{
Rune *is, *it, *js, *jt;
int a, b;
int ssgn, tsgn;
while(isspace(*s))
s++;
while(isspace(*t))
t++;
ssgn = tsgn = -2*rev;
if(*s == '-') {
s++;
ssgn = -ssgn;
}
if(*t == '-') {
t++;
tsgn = -tsgn;
}
for(is = s; isdigit(*is); is++)
;
for(it = t; isdigit(*it); it++)
;
js = is;
jt = it;
a = 0;
if(ssgn == tsgn)
while(it>t && is>s)
if(b = *--it - *--is)
a = b;
while(is > s)
if(*--is != '0')
return -ssgn;
while(it > t)
if(*--it != '0')
return tsgn;
if(a)
return sgn(a)*ssgn;
if(*(s=js) == '.')
s++;
if(*(t=jt) == '.')
t++;
if(ssgn == tsgn)
while(isdigit(*s) && isdigit(*t))
if(a = *t++ - *s++)
return sgn(a)*ssgn;
while(isdigit(*s))
if(*s++ != '0')
return -ssgn;
while(isdigit(*t))
if(*t++ != '0')
return tsgn;
return 0;
}
int
getword(Biobuf *f, Rune *rp, int n)
{
long c;
while(n-- > 0) {
c = Bgetrune(f);
if(c < 0)
return 0;
if(c == '\n') {
*rp = '\0';
return 1;
}
*rp++ = c;
}
fprint(2, "Look: word too long. Bailing out.\n");
return 0;
}