add dict

2025-01-24 11:41:58 +00:00 · 2003-11-25 03:37:45 +00:00 · 2003-11-25 03:37:45 +00:00 · 0870887793
commit 0870887793
parent 091f74d0a0
27 changed files with 8343 additions and 0 deletions
--- a/src/cmd/dict/ahd.c
+++ b/src/cmd/dict/ahd.c
@ -0,0 +1,139 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/*
+ * American Heritage Dictionary (encrypted)
+ */
+
+static Rune intab[256] = {
+	[0x82] 0xe9,
+	[0x85] 0xe0,
+	[0x89] 0xeb,
+	[0x8a] 0xe8,
+	[0xa4] 0xf1,
+	[0xf8] 0xb0,
+	[0xf9] 0xb7,
+};
+
+static char	tag[64];
+
+enum{
+	Run, Openper, Openat, Closeat
+};
+
+void
+ahdprintentry(Entry e, int cmd)
+{
+	static int inited;
+	long addr;
+	char *p, *t = tag;
+	int obreaklen;
+	int c, state = Run;
+
+	if(!inited){
+		for(c=0; c<256; c++)
+			if(intab[c] == 0)
+				intab[c] = c;
+		inited = 1;
+	}
+	obreaklen = breaklen;
+	breaklen = 80;
+	addr = e.doff;
+	for(p=e.start; p<e.end; p++){
+		c = intab[(*p ^ (addr++>>1))&0xff];
+		switch(state){
+		case Run:
+			if(c == '%'){
+				t = tag;
+				state = Openper;
+				break;
+			}
+		Putchar:
+			if(c == '\n')
+				outnl(0);
+			else if(c < Runeself)
+				outchar(c);
+			else
+				outrune(c);
+			break;
+
+		case Openper:
+			if(c == '@')
+				state = Openat;
+			else{
+				outchar('%');
+				state = Run;
+				goto Putchar;
+			}
+			break;
+
+		case Openat:
+			if(c == '@')
+				state = Closeat;
+			else if(t < &tag[sizeof tag-1])
+				*t++ = c;
+			break;
+
+		case Closeat:
+			if(c == '%'){
+				*t = 0;
+				switch(cmd){
+				case 'h':
+					if(strcmp("EH", tag) == 0)
+						goto out;
+					break;
+				case 'r':
+					outprint("%%@%s@%%", tag);
+					break;
+				}
+				state = Run;
+			}else{
+				if(t < &tag[sizeof tag-1])
+					*t++ = '@';
+				if(t < &tag[sizeof tag-1])
+					*t++ = c;
+				state = Openat;
+			}
+			break;
+		}
+	}
+out:
+	outnl(0);
+	breaklen = obreaklen;
+}
+
+long
+ahdnextoff(long fromoff)
+{
+	static char *patterns[] = { "%@NL@%", "%@2@%", 0 };
+	int c, k = 0, state = 0;
+	char *pat = patterns[0];
+	long defoff = -1;
+
+	if(Bseek(bdict, fromoff, 0) < 0)
+		return -1;
+	while((c = Bgetc(bdict)) >= 0){
+		c ^= (fromoff++>>1)&0xff;
+		if(c != pat[state]){
+			state = 0;
+			continue;
+		}
+		if(pat[++state])
+			continue;
+		if(pat = patterns[++k]){	/* assign = */
+			state = 0;
+			defoff = fromoff-6;
+			continue;
+		}
+		return fromoff-5;
+	}
+	return defoff;
+}
+
+void
+ahdprintkey(void)
+{
+	Bprint(bout, "No pronunciations.\n");
+}
--- a/src/cmd/dict/canonind.awk
+++ b/src/cmd/dict/canonind.awk
@ -0,0 +1,29 @@
+# turn output of mkindex into form needed by dict
+BEGIN {
+	if(ARGC != 2) {
+		print "Usage: awk -F'	' -f canonind.awk rawindex > index"
+		exit 1
+	}
+	file = ARGV[1]
+	ARGV[1] = ""
+	while ((getline < file) > 0) {
+		for(i = 2; i <= NF; i++) {
+			w = $i
+			if(length(w) == 0)
+				continue
+			b = index(w, "(")
+			e = index(w, ")")
+			if(b && e && b < e) {
+				w1 = substr(w, 1, b-1)
+				w2 = substr(w, b+1, e-b-1)
+				w3 =  substr(w, e+1)
+				printf "%s%s\t%d\n", w1, w3, $1 > "junk"
+				printf "%s%s%s\t%d\n", w1, w2, w3, $1 > "junk"
+			} else
+				printf "%s\t%d\n", w, $1 > "junk"
+		}
+	}
+	system("sort -u -t'	' +0f -1 +0 -1 +1n -2 < junk")
+	system("rm junk")
+	exit 0
+}
--- a/src/cmd/dict/comfix.awk
+++ b/src/cmd/dict/comfix.awk
@ -0,0 +1,56 @@
+# when raw index has a lot of entries like
+# 1578324	problematico, a, ci, che
+# apply this algorithm:
+#  treat things after comma as suffixes
+#  for each suffix:
+#      if single letter, replace last letter
+#      else search backwards for beginning of suffix
+#      and if it leads to an old suffix of approximately
+#      the same length, put replace that suffix
+# This will still leave some commas to fix by hand
+# Usage: awk -F'	' -f comfix.awk rawindex > newrawindex
+
+NF == 2	{
+		i = index($2, ",")
+		if(i == 0 || length($2) == 0)
+			print $0
+		else {
+			n = split($2, a, /,[ ]*/)
+			w = a[1]
+			printf "%s\t%s\n", $1, w
+			for(i = 2; i <= n; i++) {
+				suf = a[i]
+				m = matchsuflen(w, suf)
+				if(m) {
+					nw = substr(w, 1, length(w)-m) suf
+					printf "%s\t%s\n", $1, nw
+				} else
+					printf "%s\t%s\n", $1, w ", " suf
+			}
+		}
+	}
+NF != 2 {
+	print $0
+	}
+
+function matchsuflen(w, suf,		wlen,suflen,c,pat,k,d)
+{
+	wlen = length(w)
+	suflen = length(suf)
+	if(suflen == 1)
+		return 1
+	else {
+		c = substr(suf, 1, 1)
+		for (k = 1; k <= wlen ; k++)
+			if(substr(w, wlen-k+1, 1) == c)
+				break
+		if(k > wlen)
+			return 0
+		d = k-suflen
+		if(d < 0)
+			d = -d
+		if(d > 3)
+			return 0
+		return k
+	}
+}
--- a/src/cmd/dict/dict.c
+++ b/src/cmd/dict/dict.c
@ -0,0 +1,681 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <regexp.h>
+#include <ctype.h>
+#include "dict.h"
+
+/*
+ * Assumed index file structure: lines of form
+ * 	[^\t]+\t[0-9]+
+ * First field is key, second is byte offset into dictionary.
+ * Should be sorted with args -u -t'	' +0f -1 +0 -1 +1n -2
+ */
+typedef struct Addr Addr;
+
+struct Addr {
+	int	n;		/* number of offsets */
+	int	cur;		/* current position within doff array */
+	int	maxn;		/* actual current size of doff array */
+	ulong	doff[1];	/* doff[maxn], with 0..n-1 significant */
+};
+
+Biobuf	binbuf;
+Biobuf	boutbuf;
+Biobuf	*bin = &binbuf;		/* user cmd input */
+Biobuf	*bout = &boutbuf;	/* output */
+Biobuf	*bdict;			/* dictionary */
+Biobuf	*bindex;		/* index file */
+long	indextop;		/* index offset at end of file */
+int	lastcmd;		/* last executed command */
+Addr	*dot;			/* "current" address */
+Dict	*dict;			/* current dictionary */
+int	linelen;
+int	breaklen = 60;
+int	outinhibit;
+int	debug;
+
+void	execcmd(int);
+int	getpref(char*, Rune*);
+Entry	getentry(int);
+int	getfield(Rune*);
+long	locate(Rune*);
+int	parseaddr(char*, char**);
+int	parsecmd(char*);
+int	search(char*, int);
+long	seeknextline(Biobuf*, long);
+void	setdotnext(void);
+void	setdotprev(void);
+void	sortaddr(Addr*);
+void	usage(void);
+
+enum {
+	Plen=300,	/* max length of a search pattern */
+	Fieldlen=200,	/* max length of an index field */
+	Aslots=10,	/* initial number of slots in an address */
+};
+
+void
+main(int argc, char **argv)
+{
+	int i, cmd, kflag;
+	char *line, *p;
+
+	Binit(&binbuf, 0, OREAD);
+	Binit(&boutbuf, 1, OWRITE);
+	kflag = 0;
+	line = 0;
+	dict = 0;
+	p = getenv("PLAN9");
+	if(p == nil)
+		p = "/usr/local/plan9";
+	if(chdir(p) < 0)
+		sysfatal("chdir %s: %r", p);
+
+	for(i=0; dicts[i].name; i++){
+		if(access(dicts[i].path, 0)>=0 && access(dicts[i].indexpath, 0)>=0){
+			dict = &dicts[i];
+			break;
+		}
+	}
+	ARGBEGIN {
+		case 'd':
+			p = ARGF();
+			dict = 0;
+			if(p) {
+				for(i=0; dicts[i].name; i++)
+					if(strcmp(p, dicts[i].name)==0) {
+						dict = &dicts[i];
+						break;
+					}
+			}
+			if(!dict)
+				usage();
+			break;
+		case 'c':
+			line = ARGF();
+			if(!line)
+				usage();
+			break;
+		case 'k':
+			kflag++;
+			break;
+		case 'D':
+			debug++;
+			break;
+		default:
+			usage();
+	ARGEND }
+	if(dict == 0){
+		err("no dictionaries present on this system");
+		exits("nodict");
+	}
+
+	if(kflag) {
+		(*dict->printkey)();
+		exits(0);
+	}
+	if(argc > 1)
+		usage();
+	else if(argc == 1) {
+		if(line)
+			usage();
+		p = argv[0];
+		line = malloc(strlen(p)+5);
+		sprint(line, "/%s/P\n", p);
+	}
+	bdict = Bopen(dict->path, OREAD);
+	if(!bdict) {
+		err("can't open dictionary %s/%s", p, dict->path);
+		exits("nodict");
+	}
+	bindex = Bopen(dict->indexpath, OREAD);
+	if(!bindex) {
+		err("can't open index %s/%s", p, dict->indexpath);
+		exits("noindex");
+	}
+	indextop = Bseek(bindex, 0L, 2);
+
+	dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
+	dot->n = 0;
+	dot->cur = 0;
+	dot->maxn = Aslots;
+	lastcmd = 0;
+
+	if(line) {
+		cmd = parsecmd(line);
+		if(cmd)
+			execcmd(cmd);
+	} else {
+		for(;;) {
+			Bprint(bout, "*");
+			Bflush(bout);
+			line = Brdline(bin, '\n');
+			linelen = 0;
+			if(!line)
+				break;
+			cmd = parsecmd(line);
+			if(cmd) {
+				execcmd(cmd);
+				lastcmd = cmd;
+			}
+		}
+	}
+	exits(0);
+}
+
+void
+usage(void)
+{
+	int i;
+	char *a, *b;
+
+	Bprint(bout, "Usage: %s [-d dict] [-k] [-c cmd] [word]\n", argv0);
+	Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
+	for(i = 0; dicts[i].name; i++){
+		a = b = "";
+		if(access(dicts[i].path, 0)<0 || access(dicts[i].indexpath, 0)<0){
+			a = "[";
+			b = "]";
+		}
+		Bprint(bout, "   %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
+	}
+	exits("usage");
+}
+
+int
+parsecmd(char *line)
+{
+	char *e;
+	int cmd, ans;
+
+	if(parseaddr(line, &e) >= 0)
+		line = e;
+	else
+		return 0;
+	cmd = *line;
+	ans = cmd;
+	if(isupper(cmd))
+		cmd = tolower(cmd);
+	if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
+	     cmd == '\n')) {
+		err("unknown command %c", cmd);
+		return 0;
+	}
+	if(cmd == '\n')
+		switch(lastcmd) {
+			case 0:	ans = 'H'; break;
+			case 'H':	ans = 'p'; break;
+			default :	ans = lastcmd; break;
+		}
+	else if(line[1] != '\n' && line[1] != 0)
+		err("extra stuff after command %c ignored", cmd);
+	return ans;
+}
+
+void
+execcmd(int cmd)
+{
+	Entry e;
+	int cur, doall;
+
+	if(isupper(cmd)) {
+		doall = 1;
+		cmd = tolower(cmd);
+		cur = 0;
+	} else {
+		doall = 0;
+		cur = dot->cur;
+	}
+
+	if(debug && doall && cmd == 'a')
+		Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
+	for(;;){
+		if(cur >= dot->n)
+			break;
+		if(doall) {
+			Bprint(bout, "%d\t", cur+1);
+			linelen += 4 + (cur >= 10);
+		}
+		switch(cmd) {
+		case 'a':
+			Bprint(bout, "#%lud\n", dot->doff[cur]);
+			break;
+		case 'h':
+		case 'p':
+		case 'r':
+			e = getentry(cur);
+			(*dict->printentry)(e, cmd);
+			break;
+		}
+		cur++;
+		if(doall) {
+			if(cmd == 'p' || cmd == 'r') {
+				Bputc(bout, '\n');
+				linelen = 0;
+			}
+		} else
+			break;
+	}
+	if(cur >= dot->n)
+		cur = 0;
+	dot->cur = cur;
+}
+
+/*
+ * Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
+ * Answer goes in dot.
+ * Return -1 if address starts, but get error.
+ * Return 0 if no address.
+ */
+int
+parseaddr(char *line, char **eptr)
+{
+	int delim, plen;
+	ulong v;
+	char *e;
+	char pat[Plen];
+
+	if(*line == '/' || *line == '!') {
+		/* anchored regular expression match; '!' means no folding */
+		if(*line == '/') {
+			delim = '/';
+			e = strpbrk(line+1, "/\n");
+		} else {
+			delim = '!';
+			e = strpbrk(line+1, "!\n");
+		}
+		plen = e-line-1;
+		if(plen >= Plen-3) {
+			err("pattern too big");
+			return -1;
+		}
+		pat[0] = '^';
+		memcpy(pat+1, line+1, plen);
+		pat[plen+1] = '$';
+		pat[plen+2] = 0;
+		if(*e == '\n')
+			line = e;
+		else
+			line = e+1;
+		if(!search(pat, delim == '/')) {
+			err("pattern not found");
+			return -1;
+		}
+	} else if(*line == '#') {
+		/* absolute byte offset into dictionary */
+		line++;
+		if(!isdigit(*line))
+			return -1;
+		v = strtoul(line, &e, 10);
+		line = e;
+		dot->doff[0] = v;
+		dot->n = 1;
+		dot->cur = 0;
+	} else if(isdigit(*line)) {
+		v = strtoul(line, &e, 10);
+		line = e;
+		if(v < 1 || v > dot->n)
+			err(".%d not in range [1,%d], ignored",
+				v, dot->n);
+		else
+			dot->cur = v-1;
+	} else if(*line == '.') {
+		line++;
+	} else {
+		*eptr = line;
+		return 0;
+	}
+	while(*line == '+' || *line == '-') {
+		if(*line == '+')
+			setdotnext();
+		else
+			setdotprev();
+		line++;
+	}
+	*eptr = line;
+	return 1;
+}
+
+/*
+ * Index file is sorted by folded field1.
+ * Method: find pre, a folded prefix of r.e. pat,
+ * and then low = offset to beginning of
+ * line in index file where first match of prefix occurs.
+ * Then go through index until prefix no longer matches,
+ * adding each line that matches real pattern to dot.
+ * Finally, sort dot offsets (uniquing).
+ * We know pat len < Plen, and that it is surrounded by ^..$
+ */
+int
+search(char *pat, int dofold)
+{
+	int needre, prelen, match, n;
+	Reprog *re;
+	long ioff, v;
+	Rune pre[Plen];
+	Rune lit[Plen];
+	Rune entry[Fieldlen];
+	char fpat[Plen];
+
+	prelen = getpref(pat+1, pre);
+	if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
+		runescpy(lit, pre);
+		if(dofold)
+			fold(lit);
+		needre = 0;
+		SET(re);
+	} else {
+		needre = 1;
+		if(dofold) {
+			foldre(fpat, pat);
+			re = regcomp(fpat);
+		} else
+			re = regcomp(pat);
+	}
+	fold(pre);
+	ioff = locate(pre);
+	if(ioff < 0)
+		return 0;
+	dot->n = 0;
+	Bseek(bindex, ioff, 0);
+	for(;;) {
+		if(!getfield(entry))
+			break;
+		if(dofold)
+			fold(entry);
+		if(needre)
+			match = rregexec(re, entry, 0, 0);
+		else
+			match = (acomp(lit, entry) == 0);
+		if(match) {
+			if(!getfield(entry))
+				break;
+			v = runetol(entry);
+			if(dot->n >= dot->maxn) {
+				n = 2*dot->maxn;
+				dot = realloc(dot,
+					sizeof(Addr)+(n-1)*sizeof(long));
+				if(!dot) {
+					err("out of memory");
+					exits("nomem");
+				}
+				dot->maxn = n;
+			}
+			dot->doff[dot->n++] = v;
+		} else {
+			if(!dofold)
+				fold(entry);
+			if(*pre) {
+				n = acomp(pre, entry);
+				if(n < -1 || (!needre && n < 0))
+					break;
+			}
+			/* get to next index entry */
+			if(!getfield(entry))
+				break;
+		}
+	}
+	sortaddr(dot);
+	dot->cur = 0;
+	return dot->n;
+}
+
+/*
+ * Return offset in index file of first line whose folded
+ * first field has pre as a prefix.  -1 if none found.
+ */
+long
+locate(Rune *pre)
+{
+	long top, bot, mid;
+	Rune entry[Fieldlen];
+
+	if(*pre == 0)
+		return 0;
+	bot = 0;
+	top = indextop;
+	if(debug>1)
+		fprint(2, "locate looking for prefix %S\n", pre);
+	for(;;) {
+		/*
+		 * Loop invariant: foldkey(bot) < pre <= foldkey(top)
+		 * and bot < top, and bot,top point at beginning of lines
+		 */
+		mid = (top+bot) / 2;
+		mid = seeknextline(bindex, mid);
+		if(debug > 1)
+			fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
+				bot, (top+bot) / 2, mid, top);
+		if(mid == top || !getfield(entry))
+			break;
+		if(debug > 1)
+			fprint(2, "key=%S\n", entry);
+		/*
+		 * here mid is strictly between bot and top
+		 */
+		fold(entry);
+		if(acomp(pre, entry) <= 0)
+			top = mid;
+		else
+			bot = mid;
+	}
+	/*
+	 * bot < top, but they don't necessarily point at successive lines
+	 * Use linear search from bot to find first line that pre is a
+	 * prefix of
+	 */
+	while((bot = seeknextline(bindex, bot)) <= top) {
+		if(!getfield(entry))
+			return -1;
+		if(debug > 1)
+			fprint(2, "key=%S\n", entry);
+		fold(entry);
+		switch(acomp(pre, entry)) {
+		case -2:
+			return -1;
+		case -1:
+		case 0:
+			return bot;
+		case 1:
+		case 2:
+			continue;
+		}
+	}
+	return -1;
+
+}
+
+/*
+ * Get prefix of non re-metacharacters, runified, into pre,
+ * and return length
+ */
+int
+getpref(char *pat, Rune *pre)
+{
+	int n, r;
+	char *p;
+
+	p = pat;
+	while(*p) {
+		n = chartorune(pre, p);
+		r = *pre;
+		switch(r) {
+		case 0x2e: case 0x2a: case 0x2b: case 0x3f:
+		case 0x5b: case 0x5d: case 0x28: case ')':
+		case 0x7c: case 0x5e: case 0x24:
+			*pre = 0;
+			return p-pat;
+		case L'\\':
+			p += n;
+			p += chartorune(++pre, p);
+			pre++;
+			break;
+		default:
+			p += n;
+			pre++;
+		}
+	}
+	return p-pat;
+}
+
+long
+seeknextline(Biobuf *b, long off)
+{
+	long c;
+
+	Bseek(b, off, 0);
+	do {
+		c = Bgetrune(b);
+	} while(c>=0 && c!='\n');
+	return Boffset(b);
+}
+
+/*
+ * Get next field out of index file (either tab- or nl- terminated)
+ * Answer in *rp, assumed to be Fieldlen long.
+ * Return 0 if read error first.
+ */
+int
+getfield(Rune *rp)
+{
+	long c;
+	int n;
+
+	for(n=Fieldlen; n-- > 0; ) {
+		if ((c = Bgetrune(bindex)) < 0)
+			return 0;
+		if(c == '\t' || c == '\n') {
+			*rp = L'\0';
+			return 1;
+		}
+		*rp++ = c;
+	}
+	err("word too long");
+	return 0;
+}
+
+/*
+ * A compare longs function suitable for qsort
+ */
+static int
+longcmp(const void *av, const void *bv)
+{
+	long v;
+	long *a, *b;
+
+	a = (long*)av;
+	b = (long*)bv;
+
+	v = *a - *b;
+	if(v < 0)
+		return -1;
+	else if(v == 0)
+		return 0;
+	else
+		return 1;
+}
+
+void
+sortaddr(Addr *a)
+{
+	int i, j;
+	long v;
+
+	if(a->n <= 1)
+		return;
+
+	qsort(a->doff, a->n, sizeof(long), longcmp);
+
+	/* remove duplicates */
+	for(i=0, j=0; j < a->n; j++) {
+		v = a->doff[j];
+		if(i > 0 && v == a->doff[i-1])
+			continue;
+		a->doff[i++] = v;
+	}
+	a->n = i;
+}
+
+Entry
+getentry(int i)
+{
+	long b, e, n;
+	static Entry ans;
+	static int anslen = 0;
+
+	b = dot->doff[i];
+	e = (*dict->nextoff)(b+1);
+	ans.doff = b;
+	if(e < 0) {
+		err("couldn't seek to entry");
+		ans.start = 0;
+		ans.end = 0;
+	} else {
+		n = e-b;
+		if(n+1 > anslen) {
+			ans.start = realloc(ans.start, n+1);
+			if(!ans.start) {
+				err("out of memory");
+				exits("nomem");
+			}
+			anslen = n+1;
+		}
+		Bseek(bdict, b, 0);
+		n = Bread(bdict, ans.start, n);
+		ans.end = ans.start + n;
+		*ans.end = 0;
+	}
+	return ans;
+}
+
+void
+setdotnext(void)
+{
+	long b;
+
+	b = (*dict->nextoff)(dot->doff[dot->cur]+1);
+	if(b < 0) {
+		err("couldn't find a next entry");
+		return;
+	}
+	dot->doff[0] = b;
+	dot->n = 1;
+	dot->cur = 0;
+}
+
+void
+setdotprev(void)
+{
+	int tryback;
+	long here, last, p;
+
+	if(dot->cur < 0 || dot->cur >= dot->n)
+		return;
+	tryback = 2000;
+	here = dot->doff[dot->cur];
+	last = 0;
+	while(last == 0) {
+		p = here - tryback;
+		if(p < 0)
+			p = 0;
+		for(;;) {
+			p = (*dict->nextoff)(p+1);
+			if(p < 0)
+				return; /* shouldn't happen */
+			if(p >= here)
+				break;
+			last = p;
+		}
+		if(!last) {
+			if(here - tryback < 0) {
+				err("can't find a previous entry");
+				return;
+			}
+			tryback = 2*tryback;
+		}
+	}
+	dot->doff[0] = last;
+	dot->n = 1;
+	dot->cur = 0;
+}
--- a/src/cmd/dict/dict.h
+++ b/src/cmd/dict/dict.h
@ -0,0 +1,160 @@
+/* Runes for special purposes (0xe800-0xfdff is Private Use Area) */
+enum {	NONE=0xe800,	/* Emit nothing */
+	TAGS,		/* Start of tag */
+	TAGE,		/* End of tag */
+	SPCS,		/* Start of special character name */
+	PAR,		/* Newline, indent */
+	LIGS,		/* Start of ligature codes */
+	LACU=LIGS,	/* Acute (´) ligatures */
+	LGRV,		/* Grave (ˋ) ligatures */
+	LUML,		/* Umlaut (¨) ligatures */
+	LCED,		/* Cedilla (¸) ligatures */
+	LTIL,		/* Tilde (˜) ligatures */
+	LBRV,		/* Breve (˘) ligatures */
+	LRNG,		/* Ring (˚) ligatures */
+	LDOT,		/* Dot (˙) ligatures */
+	LDTB,		/* Dot below (.) ligatures */
+	LFRN,		/* Frown (⌢) ligatures */
+	LFRB,		/* Frown below (̯) ligatures */
+	LOGO,		/* Ogonek (˛) ligatures */
+	LMAC,		/* Macron (¯) ligatures */
+	LHCK,		/* Hacek (ˇ) ligatures */
+	LASP,		/* Asper (ʽ) ligatures */
+	LLEN,		/* Lenis (ʼ) ligatures */
+	LBRB,		/* Breve below (̮) ligatures */
+	LIGE,		/* End of ligature codes */
+	MULTI,		/* Start of multi-rune codes */
+	MAAS=MULTI,	/* ʽα */
+	MALN,		/* ʼα */
+	MAND,		/* and */
+	MAOQ,		/* a/q */
+	MBRA,		/* <| */
+	MDD,		/* .. */
+	MDDD,		/* ... */
+	MEAS,		/* ʽε */
+	MELN,		/* ʼε */
+	MEMM,		/* —— */
+	MHAS,		/* ʽη */
+	MHLN,		/* ʼη */
+	MIAS,		/* ʽι */
+	MILN,		/* ʼι */
+	MLCT,		/* ct */
+	MLFF,		/* ff */
+	MLFFI,		/* ffi */
+	MLFFL,		/* ffl */
+	MLFL,		/* fl */
+	MLFI,		/* fi */
+	MLLS,		/* ll with swing */
+	MLST,		/* st */
+	MOAS,		/* ʽο */
+	MOLN,		/* ʼο */
+	MOR,		/* or */
+	MRAS,		/* ʽρ */
+	MRLN,		/* ʼρ */
+	MTT,		/* ~~ */
+	MUAS,		/* ʽυ */
+	MULN,		/* ʼυ */
+	MWAS,		/* ʽω */
+	MWLN,		/* ʼω */
+	MOE,		/* oe */
+	MES,		/* em space */
+	MULTIE,		/* End of multi-rune codes */
+};
+#define Nligs (LIGE-LIGS)
+#define Nmulti (MULTIE-MULTI)
+
+typedef struct Entry Entry;
+typedef struct Assoc Assoc;
+typedef struct Nassoc Nassoc;
+typedef struct Dict Dict;
+
+struct Entry {
+	char	*start;		/* entry starts at start */
+	char	*end;		/* and finishes just before end */
+	long	doff;		/* dictionary offset (for debugging) */
+};
+
+struct Assoc {
+	char	*key;
+	long	val;
+};
+
+struct Nassoc {
+	long	key;
+	long	val;
+};
+
+struct Dict {
+	char	*name;			/* dictionary name */
+	char	*desc;			/* description */
+	char	*path;			/* path to dictionary data */
+	char	*indexpath;		/* path to index data */
+	long	(*nextoff)(long);	/* function to find next entry offset from arg */
+	void	(*printentry)(Entry, int); /* function to print entry */
+	void	(*printkey)(void);	/* function to print pronunciation key */
+};
+
+int	acomp(Rune*, Rune*);
+Rune	*changett(Rune *, Rune *, int);
+void	err(char*, ...);
+void	fold(Rune *);
+void	foldre(char*, char*);
+Rune	liglookup(Rune, Rune);
+long	lookassoc(Assoc*, int, char*);
+long	looknassoc(Nassoc*, int, long);
+void	outprint(char*, ...);
+void	outrune(long);
+void	outrunes(Rune *);
+void	outchar(int);
+void	outchars(char *);
+void	outnl(int);
+void	outpiece(char *, char *);
+void	runescpy(Rune*, Rune*);
+long	runetol(Rune*);
+
+long	oednextoff(long);
+void	oedprintentry(Entry, int);
+void	oedprintkey(void);
+long	ahdnextoff(long);
+void	ahdprintentry(Entry, int);
+void	ahdprintkey(void);
+long	pcollnextoff(long);
+void	pcollprintentry(Entry, int);
+void	pcollprintkey(void);
+long	pcollgnextoff(long);
+void	pcollgprintentry(Entry, int);
+void	pcollgprintkey(void);
+long	movienextoff(long);
+void	movieprintentry(Entry, int);
+void	movieprintkey(void);
+long	pgwnextoff(long);
+void	pgwprintentry(Entry,int);
+void	pgwprintkey(void);
+long	slangnextoff(long);
+void	slangprintentry(Entry, int);
+void	slangprintkey(void);
+long	robertnextoff(long);
+void	robertindexentry(Entry, int);
+void	robertprintkey(void);
+long	robertnextflex(long);
+void	robertflexentry(Entry, int);
+long	simplenextoff(long);
+void	simpleprintentry(Entry, int);
+void	simpleprintkey(void);
+long	thesnextoff(long);
+void	thesprintentry(Entry, int);
+void	thesprintkey(void);
+long	worldnextoff(long);
+void	worldprintentry(Entry, int);
+void	worldprintkey(void);
+
+extern Biobuf	*bdict;
+extern Biobuf	*bout;
+extern int	linelen;
+extern int	breaklen;
+extern int	outinhibit;
+extern int	debug;
+extern Rune	multitab[][5];
+extern Dict	dicts[];
+
+#define asize(a) (sizeof (a)/sizeof(a[0]))
--- a/src/cmd/dict/egfix
+++ b/src/cmd/dict/egfix
@ -0,0 +1,15 @@
+#!/bin/rc
+
+sed '
+	s/[ 	]+$//
+	/	/!d
+	/, /{; h; s/,.*//; p; g; s/	.*, /	/; }
+' $1 |
+sed '
+	/\(/{; h; s/\([^)]+\)//; p; g; s/[()]//g; }
+' |
+sed '
+	s/	 +/	/
+	s/[ 	]+$//
+	s/  +/ /g
+'
--- a/src/cmd/dict/egfix2
+++ b/src/cmd/dict/egfix2
@ -0,0 +1,8 @@
+#!/bin/rc
+
+awk '
+BEGIN	{ FS = "	|, " }
+	{ for(i=2; i<=NF; i++)print $i "	" $1 }
+' $1 |
+tr A-Z a-z |
+sort -u -t'	' +0f -1 +0 -1 +1n -2
--- a/src/cmd/dict/gb2312.c
+++ b/src/cmd/dict/gb2312.c
--- a/src/cmd/dict/gefix
+++ b/src/cmd/dict/gefix
@ -0,0 +1,23 @@
+#!/bin/rc
+
+sed '
+	s/[ 	]+$//
+	/	/!d
+	s/\\N''349''//g
+	s/''//g
+	s/	-/	/
+	s/-$//
+	/\([^,) ]+(\)|$)/{; h; s///; p; g; s/\(//; s/\)//; }
+	/\(r, s\)$/{; s///; p; s/$/r/; p; s/r$/s/; }
+' $1 |
+sed '
+	/\([^,) ]+(\)|$)/{; h; s///; p; g; s/\(//; s/\)//; }
+	/\(r, s\)$/{; s///; p; s/$/r/; p; s/r$/s/; }
+' |
+sed '/ß/{; p; s/ß/ss/g; }' |
+awk '
+BEGIN	{ FS = "	|, " }
+	{ for(i=2; i<=NF; i++)print $i "	" $1 }
+' |
+tr A-Z a-z |
+sort -u -t'	' +0f -1 +0 -1 +1n -2
--- a/src/cmd/dict/getneeds
+++ b/src/cmd/dict/getneeds
@ -0,0 +1,8 @@
+#!/bin/rc
+for (x in spec tag aux status) {
+	grep ' '^$x^' ' $1 > junk1
+	sort +4 -5 +3n -4 junk1 > junk2
+	awk '{if ($5 != prev) print $0; prev = $5}' junk2 > junk3
+	sort -n +2 -3 junk3 > need$x
+	rm junk*
+}
--- a/src/cmd/dict/jis208.c
+++ b/src/cmd/dict/jis208.c
--- a/src/cmd/dict/kuten.h
+++ b/src/cmd/dict/kuten.h
@ -0,0 +1,114 @@
+/*
+	following astonishing goo courtesy of kogure.
+*/
+/*
+ * MicroSoft Kanji Encoding (SJIS) Transformation
+ */
+
+/*
+ * void
+ * J2S(unsigned char *_h, unsigned char *_l)
+ *	JIS X 208 to MS kanji transformation.
+ *
+ * Calling/Exit State:
+ *	_h and _l should be in their valid range.
+ *	No return value.
+ */
+#define J2S(_h, _l) { \
+	/* lower: 21-7e >> 40-9d,9e-fb >> 40-7e,(skip 7f),80-fc */ \
+	if (((_l) += (((_h)-- % 2) ? 0x1f : 0x7d)) > 0x7e) (_l)++; \
+	/* upper: 21-7e >> 81-af >> 81-9f,(skip a0-df),e0-ef */ \
+	if (((_h) = ((_h) / 2 + 0x71)) > 0x9f) (_h) += 0x40; \
+}
+
+/*
+ * void
+ * S2J(unsigned char *_h, unsigned char *_l)
+ *	MS kanji to JIS X 208 transformation.
+ *
+ * Calling/Exit State:
+ *	_h and _l should be in valid range.
+ *	No return value.
+ */
+#define S2J(_h, _l) { \
+	/* lower: 40-7e,80-fc >> 21-5f,61-dd >> 21-7e,7f-dc */ \
+	if (((_l) -= 0x1f) > 0x60) (_l)--; \
+	/* upper: 81-9f,e0-ef >> 00-1e,5f-6e >> 00-2e >> 21-7d */ \
+	if (((_h) -= 0x81) > 0x5e) (_h) -= 0x40; (_h) *= 2, (_h) += 0x21; \
+	/* upper: ,21-7d >> ,22-7e ; lower: ,7f-dc >> ,21-7e */ \
+	if ((_l) > 0x7e) (_h)++, (_l) -= 0x5e; \
+}
+
+/*
+ * int
+ * ISJKANA(const unsigned char *_b)
+ *	Tests given byte is in the range of JIS X 0201 katakana.
+ *
+ * Calling/Exit State:
+ *	Returns 1 if it is, or 0 otherwise.
+ */
+#define	ISJKANA(_b)	(0xa0 <= (_b) && (_b) < 0xe0)
+
+/*
+ * int
+ * CANS2JH(const unsigned char *_h)
+ *	Tests given byte is in the range of valid first byte of MS
+ *	kanji code; either acts as a subroutine of CANS2J() macro
+ *	or can be used to parse MS kanji encoded strings.
+ *
+ * Calling/Exit State:
+ *	Returns 1 if it is, or 0 otherwise.
+ */
+#define CANS2JH(_h)	((0x81 <= (_h) && (_h) < 0xf0) && !ISJKANA(_h))
+
+/*
+ * int
+ * CANS2JL(const unsigned char *_l)
+ *	Tests given byte is in the range of valid second byte of MS
+ *	kanji code; acts as a subroutine of CANS2J() macro.
+ *
+ * Calling/Exit State:
+ *	Returns 1 if it is, or 0 otherwise.
+ */
+#define CANS2JL(_l)	(0x40 <= (_l) && (_l) < 0xfd && (_l) != 0x7f)
+
+/*
+ * int
+ * CANS2J(const unsigned char *_h, const unsinged char *_l)
+ *	Tests given bytes form a MS kanji code point which can be
+ *	transformed to a valid JIS X 208 code point.
+ *
+ * Calling/Exit State:
+ *	Returns 1 if they are, or 0 otherwise.
+ */
+#define CANS2J(_h, _l)  (CANS2JH(_h) && CANS2JL(_l))
+
+/*
+ * int
+ * CANJ2SB(const unsigned char *_b)
+ *	Tests given bytes is in the range of valid 94 graphic
+ *	character set; acts as a subroutine of CANJ2S() macro.
+ *
+ * Calling/Exit State:
+ *	Returns 1 if it is, or 0 otherwise.
+ */
+#define CANJ2SB(_b)	(0x21 <= (_b) && (_b) < 0x7f)
+
+/*
+ * int
+ * CANJ2S(const unsigned char *_h, const unsigned char *_l)
+ *	Tests given bytes form valid JIS X 208 code points
+ *	(which can be transformed to MS kanji).
+ *
+ * Calling/Exit State:
+ *	Returns 1 if they are, or 0 otherwise.
+ */
+#define CANJ2S(_h, _l)	(CANJ2SB(_h) && CANJ2SB(_l))
+
+#define		JIS208MAX	8407
+#define		GB2312MAX	8795
+#define		BIG5MAX		13973
+
+extern Rune tabjis208[JIS208MAX];	/* runes indexed by kuten */
+extern Rune tabgb2312[GB2312MAX];
+extern Rune tabbig5[BIG5MAX];
--- a/src/cmd/dict/mkfile
+++ b/src/cmd/dict/mkfile
@ -0,0 +1,18 @@
+PLAN9=../../..
+<$PLAN9/src/mkhdr
+
+TARG=dict
+LFILES=oed.$O ahd.$O pcollins.$O pcollinsg.$O movie.$O slang.$O robert.$O\
+	world.$O jis208.$O gb2312.$O thesaurus.$O simple.$O pgw.$O
+
+OFILES=dict.$O\
+	$LFILES\
+	utils.$O
+
+HFILES=dict.h kuten.h
+
+LDFLAGS=$LDFLAGS -lbio -l9 -lregexp9 -lfmt -lutf
+<$PLAN9/src/mkone
+
+mkindex: mkindex.$O $LFILES utils.$O
+	$LD $LDFLAGS -o $target $prereq
--- a/src/cmd/dict/mkindex.c
+++ b/src/cmd/dict/mkindex.c
@ -0,0 +1,106 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/*
+ * Use this to start making an index for a new dictionary.
+ * Get the dictionary-specific nextoff and printentry(_,'h')
+ * commands working, add a record to the dicts[] array below,
+ * and run this program to get a list of offset,headword
+ * pairs
+ */
+Biobuf	boutbuf;
+Biobuf	*bdict;
+Biobuf	*bout = &boutbuf;
+int	linelen;
+int	breaklen = 2000;
+int	outinhibit;
+int	debug;
+
+Dict	*dict;	/* current dictionary */
+
+Entry	getentry(long);
+
+void
+main(int argc, char **argv)
+{
+	int i;
+	long a, ae;
+	char *p;
+	Entry e;
+
+	Binit(&boutbuf, 1, OWRITE);
+	dict = &dicts[0];
+	ARGBEGIN {
+		case 'd':
+			p = ARGF();
+			dict = 0;
+			if(p) {
+				for(i=0; dicts[i].name; i++)
+					if(strcmp(p, dicts[i].name)==0) {
+						dict = &dicts[i];
+						break;
+					}
+			}
+			if(!dict) {
+				err("unknown dictionary: %s", p);
+				exits("nodict");
+			}
+			break;
+		case 'D':
+			debug++;
+			break;
+	ARGEND }
+	USED(argc,argv);
+	bdict = Bopen(dict->path, OREAD);
+	ae = Bseek(bdict, 0, 2);
+	if(!bdict) {
+		err("can't open dictionary %s", dict->path);
+		exits("nodict");
+	}
+	for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) {
+		linelen = 0;
+		e = getentry(a);
+		Bprint(bout, "%ld\t", a);
+		linelen = 4;	/* only has to be approx right */
+		(*dict->printentry)(e, 'h');
+	}
+	exits(0);
+}
+
+Entry
+getentry(long b)
+{
+	long e, n, dtop;
+	static Entry ans;
+	static int anslen = 0;
+
+	e = (*dict->nextoff)(b+1);
+	ans.doff = b;
+	if(e < 0) {
+		dtop = Bseek(bdict, 0L, 2);
+		if(b < dtop) {
+			e = dtop;
+		} else {
+			err("couldn't seek to entry");
+			ans.start = 0;
+			ans.end = 0;
+		}
+	}
+	n = e-b;
+	if(n) {
+		if(n > anslen) {
+			ans.start = realloc(ans.start, n);
+			if(!ans.start) {
+				err("out of memory");
+				exits("nomem");
+			}
+			anslen = n;
+		}
+		Bseek(bdict, b, 0);
+		n = Bread(bdict, ans.start, n);
+		ans.end = ans.start + n;
+	}
+	return ans;
+}
--- a/src/cmd/dict/movie.c
+++ b/src/cmd/dict/movie.c
@ -0,0 +1,328 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/* Possible tags */
+enum {
+	BEG,	/* beginning of entry */
+	AB,	/* abstract */
+	AN,	/* database serial number */
+	AS,	/* author (one at a time) */
+	AU,	/* all authors */
+	AW,	/* award_awardee */
+	BW,	/* bw or c */
+	CA,	/* cast: character_actor */
+	CN,	/* cinematography */
+	CO,	/* country */
+	CR,	/* miscellaneous job_name */
+	DE,	/* topic keyword */
+	DR,	/* director */
+	ED,	/* editor */
+	MP,	/* MPAA rating (R, PG, etc.) */
+	NT,	/* note */
+	PR,	/* producer and for ...*/
+	PS,	/* producer (repeats info in PR) */
+	RA,	/* rating (letter) */
+	RD,	/* release date */
+	RT,	/* running time */
+	RV,	/* review citation */
+	ST,	/* production or release company (repeats info in PR) */
+	TI,	/* title[; original foreign title] */
+	TX,	/* paragraph of descriptive text */
+	VD,	/* video information (format_time_company; or "Not Avail.") */
+	NTAG	/* number of tags */
+};
+
+/* Assoc tables must be sorted on first field */
+
+static char *tagtab[] = {
+[BEG]	"$$",
+[AB]	"AB",
+[AN]	"AN",
+[AS]	"AS",
+[AU]	"AU",
+[AW]	"AW",
+[BW]	"BW",
+[CA]	"CA",
+[CN]	"CN",
+[CO]	"CO",
+[CR]	"CR",
+[DE]	"DE",
+[DR]	"DR",
+[ED]	"ED",
+[MP]	"MP",
+[NT]	"NT",
+[PR]	"PR",
+[PS]	"PS",
+[RA]	"RA",
+[RD]	"RD",
+[RT]	"RT",
+[RV]	"RV",
+[ST]	"ST",
+[TI]	"TI",
+[TX]	"TX",
+[VD]	"VD",
+};
+
+static char	*mget(int, char *, char *, char **);
+#if 0
+static void	moutall(int, char *, char *);
+#endif
+static void	moutall2(int, char *, char *);
+
+void
+movieprintentry(Entry ent, int cmd)
+{
+	char *p, *e, *ps, *pe, *pn;
+	int n;
+
+	ps = ent.start;
+	pe = ent.end;
+	if(cmd == 'r') {
+		Bwrite(bout, ps, pe-ps);
+		return;
+	}
+	p = mget(TI, ps, pe, &e);
+	if(p) {
+		outpiece(p, e);
+		outnl(0);
+	}
+	if(cmd == 'h')
+		return;
+	outnl(2);
+	n = 0;
+	p = mget(RD, ps, pe, &e);
+	if(p) {
+		outchars("Released: ");
+		outpiece(p, e);
+		n++;
+	}
+	p = mget(CO, ps, pe, &e);
+	if(p) {
+		if(n)
+			outchars(", ");
+		outpiece(p, e);
+		n++;
+	}
+	p = mget(RT, ps, pe, &e);
+	if(p) {
+		if(n)
+			outchars(", ");
+		outchars("Running time: ");
+		outpiece(p, e);
+		n++;
+	}
+	p = mget(MP, ps, pe, &e);
+	if(p) {
+		if(n)
+			outchars(", ");
+		outpiece(p, e);
+		n++;
+	}
+	p = mget(BW, ps, pe, &e);
+	if(p) {
+		if(n)
+			outchars(", ");
+		if(*p == 'c' || *p == 'C')
+			outchars("Color");
+		else
+			outchars("B&W");
+		n++;
+	}
+	if(n) {
+		outchar('.');
+		outnl(1);
+	}
+	p = mget(VD, ps, pe, &e);
+	if(p) {
+		outchars("Video: ");
+		outpiece(p, e);
+		outnl(1);
+	}
+	p = mget(AU, ps, pe, &e);
+	if(p) {
+		outchars("By: ");
+		moutall2(AU, ps, pe);
+		outnl(1);
+	}
+	p = mget(DR, ps, pe, &e);
+	if(p) {
+		outchars("Director: ");
+		outpiece(p, e);
+		outnl(1);
+	}
+	p = mget(PR, ps, pe, &e);
+	if(p) {
+		outchars("Producer: ");
+		outpiece(p, e);
+		outnl(1);
+	}
+	p = mget(CN, ps, pe, &e);
+	if(p) {
+		outchars("Cinematograpy: ");
+		outpiece(p, e);
+		outnl(1);
+	}
+	p = mget(CR, ps, pe, &e);
+	if(p) {
+		outchars("Other Credits: ");
+		moutall2(CR, ps, pe);
+	}
+	outnl(2);
+	p = mget(CA, ps, pe, &e);
+	if(p) {
+		outchars("Cast: ");
+		moutall2(CA, ps, pe);
+	}
+	outnl(2);
+	p = mget(AW, ps, pe, &e);
+	if(p) {
+		outchars("Awards: ");
+		moutall2(AW, ps, pe);
+		outnl(2);
+	}
+	p = mget(NT, ps, pe, &e);
+	if(p) {
+		outpiece(p, e);
+		outnl(2);
+	}
+	p = mget(AB, ps, pe, &e);
+	if(p) {
+		outpiece(p, e);
+		outnl(2);
+	}
+	pn = ps;
+	n = 0;
+	while((p = mget(TX, pn, pe, &pn)) != 0) {
+		if(n++)
+			outnl(1);
+		outpiece(p, pn);
+	}
+	outnl(0);
+}
+
+long
+movienextoff(long fromoff)
+{
+	long a;
+	char *p;
+
+	a = Bseek(bdict, fromoff, 0);
+	if(a < 0)
+		return -1;
+	for(;;) {
+		p = Brdline(bdict, '\n');
+		if(!p)
+			break;
+		if(p[0] == '$' && p[1] == '$')
+			return (Boffset(bdict)-Blinelen(bdict));
+	}
+	return -1;
+}
+
+void
+movieprintkey(void)
+{
+	Bprint(bout, "No key\n");
+}
+
+/*
+ * write a comma-separated list of all tag values between b and e
+ */
+#if 0
+static void
+moutall(int tag, char *b, char *e)
+{
+	char *p, *pn;
+	int n;
+
+	n = 0;
+	pn = b;
+	while((p = mget(tag, pn, e, &pn)) != 0) {
+		if(n++)
+			outchars(", ");
+		outpiece(p, pn);
+	}
+}
+#endif
+
+/*
+ * like moutall, but values are expected to have form:
+ *    field1_field2
+ * and we are to output 'field2 (field1)' for each
+ * (sometimes field1 has underscores, so search from end)
+ */
+static void
+moutall2(int tag, char *b, char *e)
+{
+	char *p, *pn, *us, *q;
+	int n;
+
+	n = 0;
+	pn = b;
+	while((p = mget(tag, pn, e, &pn)) != 0) {
+		if(n++)
+			outchars(", ");
+		us = 0;
+		for(q = pn-1; q >= p; q--)
+			if(*q == '_') {
+				us = q;
+				break;
+			}
+		if(us) {
+			/*
+			 * Hack to fix cast list Himself/Herself
+			 */
+			if(strncmp(us+1, "Himself", 7) == 0 ||
+			   strncmp(us+1, "Herself", 7) == 0) {
+				outpiece(p, us);
+				outchars(" (");
+				outpiece(us+1, pn);
+				outchar(')');
+			} else {
+				outpiece(us+1, pn);
+				outchars(" (");
+				outpiece(p, us);
+				outchar(')');
+			}
+		} else {
+			outpiece(p, pn);
+		}
+	}
+}
+
+/*
+ * Starting from b, find next line beginning with tagtab[tag].
+ * Don't go past e, but assume *e==0.
+ * Return pointer to beginning of value (after tag), and set
+ * eptr to point at newline that ends the value
+ */
+static char *
+mget(int tag, char *b, char *e, char **eptr)
+{
+	char *p, *t, *ans;
+
+	if(tag < 0 || tag >= NTAG)
+		return 0;
+	t = tagtab[tag];
+	ans = 0;
+	for(p = b;;) {
+		p = strchr(p, '\n');
+		if(!p || ++p >= e) {
+			if(ans)
+				*eptr = e-1;
+			break;
+		}
+		if(!ans) {
+			if(p[0] == t[0] && p[1] == t[1])
+				ans = p+3;
+		} else {
+			if(p[0] != ' ') {
+				*eptr = p-1;
+				break;
+			}
+		}
+	}
+	return ans;
+}
--- a/src/cmd/dict/oed.c
+++ b/src/cmd/dict/oed.c
--- a/src/cmd/dict/pcollins.c
+++ b/src/cmd/dict/pcollins.c
@ -0,0 +1,226 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/*
+ * Routines for handling dictionaries in the "Paperback Collins"
+ * format (with tags surrounded by >....<)
+ */
+enum {
+	Buflen=1000,
+};
+
+/* More special runes */
+enum {
+	B = MULTIE+1,	/* bold */
+	H,		/* headword start */
+	I,		/* italics */
+	Ps,		/* pronunciation start */
+	Pe,		/* pronunciation end */
+	R,		/* roman */
+	X,		/* headword end */
+};
+
+/* Assoc tables must be sorted on first field */
+
+static Assoc tagtab[] = {
+	{"AA",		0xc5},
+	{"AC",		LACU},
+	{"B",		B},
+	{"CE",		LCED},
+	{"CI",		LFRN},
+	{"Di",		0x131},
+	{"EL",		0x2d},
+	{"GR",		LGRV},
+	{"H",		H},
+	{"I",		I},
+	{"OE",		0x152},
+	{"R",		R},
+	{"TI",		LTIL},
+	{"UM",		LUML},
+	{"X",		X},
+	{"[",		Ps},
+	{"]",		Pe},
+	{"ac",		LACU},
+	{"ce",		LCED},
+	{"ci",		LFRN},
+	{"gr",		LGRV},
+	{"oe",		0x153},
+	{"supe",	0x65},		/* should be raised */
+	{"supo",	0x6f},		/* should be raised */
+	{"ti",		LTIL},
+	{"um",		LUML},
+	{"{",		Ps},
+	{"~",		0x7e},
+	{"~~",		MTT},
+};
+
+static Rune normtab[128] = {
+	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
+/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	0x20,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	'\'',
+	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
+/*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
+	0x38,	0x39,	0x3a,	0x3b,	TAGE,	0x3d,	TAGS,	0x3f,
+/*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
+	0x48,	0x49,	0x4a,	0x4b,	L'L',	0x4d,	0x4e,	0x4f,
+/*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
+	0x58,	0x59,	0x5a,	0x5b,	L'\\',	0x5d,	0x5e,	0x5f,
+/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
+	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
+/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
+	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
+};
+
+static char *gettag(char *, char *);
+
+static Entry	curentry;
+static char	tag[Buflen];
+#define cursize (curentry.end-curentry.start)
+
+void
+pcollprintentry(Entry e, int cmd)
+{
+	char *p, *pe;
+	long r, rprev, t, rlig;
+	int saveoi;
+	Rune *transtab;
+
+	p = e.start;
+	pe = e.end;
+	transtab = normtab;
+	rprev = NONE;
+	changett(0, 0, 0);
+	curentry = e;
+	saveoi = 0;
+	if(cmd == 'h')
+		outinhibit = 1;
+	while(p < pe) {
+		if(cmd == 'r') {
+			outchar(*p++);
+			continue;
+		}
+		r = transtab[(*p++)&0x7F];
+		if(r < NONE) {
+			/* Emit the rune, but buffer in case of ligature */
+			if(rprev != NONE)
+				outrune(rprev);
+			rprev = r;
+		} else if(r == TAGS) {
+			p = gettag(p, pe);
+			t = lookassoc(tagtab, asize(tagtab), tag);
+			if(t == -1) {
+				if(debug && !outinhibit)
+					err("tag %ld %d %s",
+						e.doff, cursize, tag);
+				continue;
+			}
+			if(t < NONE) {
+				if(rprev != NONE)
+					outrune(rprev);
+				rprev = t;
+			} else if(t >= LIGS && t < LIGE) {
+				/* handle possible ligature */
+				rlig = liglookup(t, rprev);
+				if(rlig != NONE)
+					rprev = rlig;	/* overwrite rprev */
+				else {
+					/* could print accent, but let's not */
+					if(rprev != NONE) outrune(rprev);
+					rprev = NONE;
+				}
+			} else if(t >= MULTI && t < MULTIE) {
+				if(rprev != NONE) {
+					outrune(rprev);
+					rprev = NONE;
+				}
+				outrunes(multitab[t-MULTI]);
+			} else {
+				if(rprev != NONE) {
+					outrune(rprev);
+					rprev = NONE;
+				}
+				switch(t){
+				case H:
+					if(cmd == 'h')
+						outinhibit = 0;
+					else
+						outnl(0);
+					break;
+				case X:
+					if(cmd == 'h')
+						outinhibit = 1;
+					else
+						outchars(".  ");
+					break;
+				case Ps:
+					/* don't know enough of pron. key yet */
+					saveoi = outinhibit;
+					outinhibit = 1;
+					break;
+				case Pe:
+					outinhibit = saveoi;
+					break;
+				}
+			}
+		}
+	}
+	if(cmd == 'h')
+		outinhibit = 0;
+	outnl(0);
+}
+
+long
+pcollnextoff(long fromoff)
+{
+	long a;
+	char *p;
+
+	a = Bseek(bdict, fromoff, 0);
+	if(a < 0)
+		return -1;
+	for(;;) {
+		p = Brdline(bdict, '\n');
+		if(!p)
+			break;
+		if(p[0] == '>' && p[1] == 'H' && p[2] == '<')
+			return (Boffset(bdict)-Blinelen(bdict));
+	}
+	return -1;
+}
+
+void
+pcollprintkey(void)
+{
+	Bprint(bout, "No pronunciation key yet\n");
+}
+
+/*
+ * f points just after '>'; fe points at end of entry.
+ * Expect next characters from bin to match:
+ *  [^ <]+<
+ *     tag
+ * Accumulate the tag in tag[].
+ * Return pointer to after final '<'.
+ */
+static char *
+gettag(char *f, char *fe)
+{
+	char *t;
+	int c, i;
+
+	t = tag;
+	i = Buflen;
+	while(--i > 0) {
+		c = *f++;
+		if(c == '<' || f == fe)
+			break;
+		*t++ = c;
+	}
+	*t = 0;
+	return f;
+}
--- a/src/cmd/dict/pcollinsg.c
+++ b/src/cmd/dict/pcollinsg.c
@ -0,0 +1,248 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/*
+ * Routines for handling dictionaries in the "Paperback Collins"
+ * `German' format (with tags surrounded by \5⋯\6 and \xba⋯\xba)
+ */
+
+/*
+ *	\5...\6 escapes (fonts, mostly)
+ *
+ *	h	headword (helvetica 7 pt)
+ *	c	clause (helvetica 7 pt)
+ *	3	helvetica 7 pt
+ *	4	helvetica 6.5 pt
+ *	s	helvetica 8 pt
+ *	x	helvetica 8 pt
+ *	y	helvetica 5 pt
+ *	m	helvetica 30 pt
+ *	1	roman 6 pt
+ *	9	roman 4.5 pt
+ *	p	roman 7 pt
+ *	q	roman 4.5 pt
+ *	2	italic 6 pt
+ *	7	italic 4.5 pt
+ *	b	bold 6 pt
+ *	a	`indent 0:4 left'
+ *	k	`keep 9'
+ *	l	`size 12'
+ */
+
+enum {
+	IBASE=0x69,	/* dotless i */
+	Taglen=32,
+};
+
+static Rune intab[256] = {
+	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
+/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	TAGS,	TAGE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	0x20,	NONE,	NONE,
+/*10*/	NONE,	0x2d,	0x20,	0x20,	NONE,	NONE,	NONE,	NONE,
+	0x20,	NONE,	NONE,	NONE,	0x20,	NONE,	NONE,	0x2d,
+/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	'\'',
+	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
+/*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
+	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f,
+/*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
+	0x48,	0x49,	0x4a,	0x4b,	L'L',	0x4d,	0x4e,	0x4f,
+/*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
+	0x58,	0x59,	0x5a,	0x5b,	L'\\',	0x5d,	0x5e,	0x5f,
+/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
+	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
+/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
+	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
+/*80*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	0x20,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*90*/	0xdf,	0xe6,	NONE,	MOE,	NONE,	NONE,	NONE,	0xf8,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*A0*/	NONE,	NONE,	0x22,	0xa3,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*B0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	0x7e,
+	NONE,	IBASE,	SPCS,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*C0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*D0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*E0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*F0*/	0x20,	0x20,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+};
+
+static Nassoc numtab[] = {
+	{1,	0x2b},
+	{4,	0x3d},
+	{7,	0xb0},
+	{11,	0x2248},
+	{69,	0x2666},
+	{114,	0xae},
+	{340,	0x25b},
+	{341,	0x254},
+	{342,	0x28c},
+	{343,	0x259},
+	{345,	0x292},
+	{346,	0x283},
+	{347,	0x275},
+	{348,	0x28a},
+	{349,	0x2c8},
+	{351,	0x26a},
+	{352,	0x25c},
+	{354,	0x251},
+	{355,	0x7e},
+	{356,	0x252},
+	{384,	0x273},
+	{445,	0xf0},	/* BUG -- should be script eth */
+};
+
+static Nassoc overtab[] = {
+	{0x2c,	LCED},
+	{0x2f,	LACU},
+	{0x3a,	LUML},
+	{L'\\',	LGRV},
+	{0x5e,	LFRN},
+	{0x7e,	LTIL},
+};
+
+static uchar *reach(uchar*, int);
+
+static Entry	curentry;
+static char	tag[Taglen];
+
+void
+pcollgprintentry(Entry e, int cmd)
+{
+	uchar *p, *pe;
+	int r, rprev = NONE, rx, over = 0, font;
+	char buf[16];
+
+	p = (uchar *)e.start;
+	pe = (uchar *)e.end;
+	curentry = e;
+	if(cmd == 'h')
+		outinhibit = 1;
+	while(p < pe){
+		if(cmd == 'r'){
+			outchar(*p++);
+			continue;
+		}
+		switch(r = intab[*p++]){	/* assign = */
+		case TAGS:
+			if(rprev != NONE){
+				outrune(rprev);
+				rprev = NONE;
+			}
+			p = reach(p, 0x06);
+			font = tag[0];
+			if(cmd == 'h')
+				outinhibit = (font != 'h');
+			break;
+
+		case TAGE:	/* an extra one */
+			break;
+	
+		case SPCS:
+			p = reach(p, 0xba);
+			r = looknassoc(numtab, asize(numtab), strtol(tag,0,0));
+			if(r < 0){
+				if(rprev != NONE){
+					outrune(rprev);
+					rprev = NONE;
+				}
+				sprint(buf, "\\N'%s'", tag);
+				outchars(buf);
+				break;
+			}
+			/* else fall through */
+
+		default:
+			if(over){
+				rx = looknassoc(overtab, asize(overtab), r);
+				if(rx > 0)
+					rx = liglookup(rx, rprev);
+				if(rx > 0 && rx != NONE)
+					outrune(rx);
+				else{
+					outrune(rprev);
+					if(r == ':')
+						outrune(0xa8);
+					else{
+						outrune(0x5e);
+						outrune(r);
+					}
+				}
+				over = 0;
+				rprev = NONE;
+			}else if(r == '^'){
+				over = 1;
+			}else{
+				if(rprev != NONE)
+					outrune(rprev);
+				rprev = r;
+			}
+		}
+		
+	}
+	if(rprev != NONE)
+		outrune(rprev);
+	if(cmd == 'h')
+		outinhibit = 0;
+	outnl(0);
+}
+
+long
+pcollgnextoff(long fromoff)
+{
+	int c, state = 0, defoff = -1;
+
+	if(Bseek(bdict, fromoff, 0) < 0)
+		return -1;
+	while((c = Bgetc(bdict)) >= 0){
+		if(c == '\r')
+			defoff = Boffset(bdict);
+		switch(state){
+		case 0:
+			if(c == 0x05)
+				state = 1;
+			break;
+		case 1:
+			if(c == 'h')
+				state = 2;
+			else
+				state = 0;
+			break;
+		case 2:
+			if(c == 0x06)
+				return (Boffset(bdict)-3);
+			else
+				state = 0;
+			break;
+		}
+	}
+	return defoff;
+}
+
+void
+pcollgprintkey(void)
+{
+	Bprint(bout, "No pronunciation key yet\n");
+}
+
+static uchar *
+reach(uchar *p, int tagchar)
+{
+	int c; char *q=tag;
+
+	while(p < (uchar *)curentry.end){
+		c = *p++;
+		if(c == tagchar)
+			break;
+		*q++ = c;
+		if(q >= &tag[sizeof tag-1])
+			break;
+	}
+	*q = 0;
+	return p;
+}
--- a/src/cmd/dict/pgw.c
+++ b/src/cmd/dict/pgw.c
--- a/src/cmd/dict/rev.awk
+++ b/src/cmd/dict/rev.awk
@ -0,0 +1,6 @@
+NF == 2	{
+		printf "%s\t%s\n", $2, $1
+	}
+NF != 2 {
+		print "ERROR " $0
+	}
--- a/src/cmd/dict/robert.c
+++ b/src/cmd/dict/robert.c
@ -0,0 +1,312 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/*
+ * Robert Électronique.
+ */
+
+enum
+{
+	CIT = MULTIE+1,	/* citation ptr followed by long int and ascii label */
+	BROM,		/* bold roman */
+	ITON,		/* start italic */
+	ROM,		/* roman */
+	SYM,		/* symbol font? */
+	HEL,		/* helvetica */
+	BHEL,		/* helvetica bold */
+	SMALL,		/* smaller? */
+	ITOFF,		/* end italic */
+	SUP,		/* following character is superscript */
+	SUB		/* following character is subscript */
+};
+
+static Rune intab[256] = {
+	/*0*/	/*1*/	/*2*/	/*3*/	/*4*/	/*5*/	/*6*/	/*7*/
+/*00*/	NONE,	0x263a,	0x263b,	0x2665,	0x2666,	0x2663,	0x2660,	0x2022,
+	0x25d8,	0x298,	L'\n',	0x2642,	0x2640,	0x266a,	0x266b,	0x203b,
+/*10*/	0x21e8,	0x21e6,	0x2195,	0x203c,	0xb6,	0xa7,	0x2043,	0x21a8,
+	0x2191,	0x2193,	0x2192,	0x2190,	0x2319,	0x2194,	0x25b4,	0x25be,
+/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	L'\'',
+	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
+/*30*/	0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
+	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f,
+/*40*/	0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
+	0x48,	0x49,	0x4a,	0x4b,	L'L',	0x4d,	0x4e,	0x4f,
+/*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
+	0x58,	0x59,	0x5a,	0x5b,	L'\\',	0x5d,	0x5e,	0x5f,
+/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
+	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
+/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
+	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	0x7f,
+/*80*/	0xc7,	0xfc,	0xe9,	0xe2,	0xe4,	0xe0,	0xe5,	0xe7,
+	0xea,	0xeb,	0xe8,	0xef,	0xee,	0xec,	0xc4,	0xc5,
+/*90*/	0xc9,	0xe6,	0xc6,	0xf4,	0xf6,	0xf2,	0xfb,	0xf9,
+	0xff,	0xd6,	0xdc,	0xa2,	0xa3,	0xa5,	0x20a7,	0x283,
+/*a0*/	0xe1,	0xed,	0xf3,	0xfa,	0xf1,	0xd1,	0xaa,	0xba,
+	0xbf,	0x2310,	0xac,	0xbd,	0xbc,	0xa1,	0xab,	0xbb,
+/*b0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*c0*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	CIT,	BROM,	NONE,	ITON,	ROM,	SYM,	HEL,	BHEL,
+/*d0*/	NONE,	SMALL,	ITOFF,	SUP,	SUB,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+/*e0*/	0x3b1,	0xdf,	0x3b3,	0x3c0,	0x3a3,	0x3c3,	0xb5,	0x3c4,
+	0x3a6,	0x398,	0x3a9,	0x3b4,	0x221e,	0xd8,	0x3b5,	0x2229,
+/*f0*/	0x2261,	0xb1,	0x2265,	0x2264,	0x2320,	0x2321,	0xf7,	0x2248,
+	0xb0,	0x2219,	0xb7,	0x221a,	0x207f,	0xb2,	0x220e,	0xa0,
+};
+
+static Rune suptab[] = {
+	['0'] 0x2070,	['1'] 0x2071,	['2'] 0x2072,	['3'] 0x2073,
+	['4'] 0x2074,	['5'] 0x2075,	['6'] 0x2076,	['7'] 0x2077,
+	['8'] 0x2078,	['9'] 0x2079,	['+'] 0x207a,	['-'] 0x207b,
+	['='] 0x207c,	['('] 0x207d,	[')'] 0x207e,	['a'] 0xaa,
+	['n'] 0x207f,	['o'] 0xba
+};
+
+static Rune subtab[] = {
+	['0'] 0x2080,	['1'] 0x2081,	['2'] 0x2082,	['3'] 0x2083,
+	['4'] 0x2084,	['5'] 0x2085,	['6'] 0x2086,	['7'] 0x2087,
+	['8'] 0x2088,	['9'] 0x2089,	['+'] 0x208a,	['-'] 0x208b,
+	['='] 0x208c,	['('] 0x208d,	[')'] 0x208e
+};
+
+#define	GSHORT(p)	(((p)[0]<<8) | (p)[1])
+#define	GLONG(p)	(((p)[0]<<24) | ((p)[1]<<16) | ((p)[2]<<8) | (p)[3])
+
+static char	cfile[] = "/lib/dict/robert/cits.rob";
+static char	dfile[] = "/lib/dict/robert/defs.rob";
+static char	efile[] = "/lib/dict/robert/etym.rob";
+static char	kfile[] = "/lib/dict/robert/_phon";
+
+static Biobuf *	cb;
+static Biobuf *	db;
+static Biobuf *	eb;
+
+static Biobuf *	Bouvrir(char*);
+static void	citation(int, int);
+static void	robertprintentry(Entry*, Entry*, int);
+
+void
+robertindexentry(Entry e, int cmd)
+{
+	uchar *p = (uchar *)e.start;
+	long ea, el, da, dl, fa;
+	Entry def, etym;
+
+	ea = GLONG(&p[0]);
+	el = GSHORT(&p[4]);
+	da = GLONG(&p[6]);
+	dl = GSHORT(&p[10]);
+	fa = GLONG(&p[12]);
+	USED(fa);
+
+	if(db == 0)
+		db = Bouvrir(dfile);
+	def.start = malloc(dl+1);
+	def.end = def.start + dl;
+	def.doff = da;
+	Bseek(db, da, 0);
+	Bread(db, def.start, dl);
+	*def.end = 0;
+	if(cmd == 'h'){
+		robertprintentry(&def, 0, cmd);
+	}else{
+		if(eb == 0)
+			eb = Bouvrir(efile);
+		etym.start = malloc(el+1);
+		etym.end = etym.start + el;
+		etym.doff = ea;
+		Bseek(eb, ea, 0);
+		Bread(eb, etym.start, el);
+		*etym.end = 0;
+		robertprintentry(&def, &etym, cmd);
+		free(etym.start);
+	}
+	free(def.start);
+}
+
+static void
+robertprintentry(Entry *def, Entry *etym, int cmd)
+{
+	uchar *p, *pe;
+	Rune r; int c, n;
+	int baseline = 0;
+	int lineno = 0;
+	int cit = 0;
+
+	p = (uchar *)def->start;
+	pe = (uchar *)def->end;
+	while(p < pe){
+		if(cmd == 'r'){
+			outchar(*p++);
+			continue;
+		}
+		c = *p++;
+		switch(r = intab[c]){	/* assign = */
+		case BROM:
+		case ITON:
+		case ROM:
+		case SYM:
+		case HEL:
+		case BHEL:
+		case SMALL:
+		case ITOFF:
+		case NONE:
+			if(debug)
+				outprint("\\%.2ux", c);
+			baseline = 0;
+			break;
+
+		case SUP:
+			baseline = 1;
+			break;
+
+		case SUB:
+			baseline = -1;
+			break;
+
+		case CIT:
+			n = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
+			p += 4;
+			if(debug)
+				outprint("[%d]", n);
+			while(*p == ' ' || ('0'<=*p && *p<='9') || *p == '.'){
+				if(debug)
+					outchar(*p);
+				++p;
+			}
+			++cit;
+			outnl(2);
+			citation(n, cmd);
+			baseline = 0;
+			break;
+
+		case '\n':
+			outnl(0);
+			baseline = 0;
+			++lineno;
+			break;
+
+		default:
+			if(baseline > 0 && r < nelem(suptab))
+				r = suptab[r];
+			else if(baseline < 0 && r < nelem(subtab))
+				r = subtab[r];
+			if(cit){
+				outchar('\n');
+				cit = 0;
+			}
+			outrune(r);
+			baseline = 0;
+			break;
+		}
+		if(r == '\n'){
+			if(cmd == 'h')
+				break;
+			if(lineno == 1 && etym)
+				robertprintentry(etym, 0, cmd);
+		}
+	}
+	outnl(0);
+}
+
+static void
+citation(int addr, int cmd)
+{
+	Entry cit;
+
+	if(cb == 0)
+		cb = Bouvrir(cfile);
+	Bseek(cb, addr, 0);
+	cit.start = Brdline(cb, 0xc8);
+	cit.end = cit.start + Blinelen(cb) - 1;
+	cit.doff = addr;
+	*cit.end = 0;
+	robertprintentry(&cit, 0, cmd);
+}
+
+long
+robertnextoff(long fromoff)
+{
+	return (fromoff & ~15) + 16;
+}
+
+void
+robertprintkey(void)
+{
+	Biobuf *db;
+	char *l;
+
+	db = Bouvrir(kfile);
+	while(l = Brdline(db, '\n'))	/* assign = */
+		Bwrite(bout, l, Blinelen(db));
+	Bterm(db);
+}
+
+void
+robertflexentry(Entry e, int cmd)
+{
+	uchar *p, *pe;
+	Rune r; int c;
+	int lineno = 1;
+
+	p = (uchar *)e.start;
+	pe = (uchar *)e.end;
+	while(p < pe){
+		if(cmd == 'r'){
+			Bputc(bout, *p++);
+			continue;
+		}
+		c = *p++;
+		r = intab[c];
+		if(r == '$')
+			r = '\n';
+		if(r == '\n'){
+			++lineno;
+			if(cmd == 'h' && lineno > 2)
+				break;
+		}
+		if(cmd == 'h' && lineno < 2)
+			continue;
+		if(r > MULTIE){
+			if(debug)
+				Bprint(bout, "\\%.2ux", c);
+			continue;
+		}
+		if(r < Runeself)
+			Bputc(bout, r);
+		else
+			Bputrune(bout, r);
+	}
+	outnl(0);
+}
+
+long
+robertnextflex(long fromoff)
+{
+	int c;
+
+	if(Bseek(bdict, fromoff, 0) < 0)
+		return -1;
+	while((c = Bgetc(bdict)) >= 0){
+		if(c == '$')
+			return Boffset(bdict);
+	}
+	return -1;
+}
+
+static Biobuf *
+Bouvrir(char *fichier)
+{
+	Biobuf *db;
+
+	db = Bopen(fichier, OREAD);
+	if(db == 0){
+		fprint(2, "%s: impossible d'ouvrir %s: %r\n", argv0, fichier);
+		exits("ouvrir");
+	}
+	return db;
+}
--- a/src/cmd/dict/simple.c
+++ b/src/cmd/dict/simple.c
@ -0,0 +1,46 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/*
+ * Routines for handling dictionaries in UTF, headword
+ * separated from entry by tab, entries separated by newline.
+ */
+
+void
+simpleprintentry(Entry e, int cmd)
+{
+	uchar *p, *pe;
+
+	p = (uchar *)e.start;
+	pe = (uchar *)e.end;
+	while(p < pe){
+		if(*p == '\t'){
+			if(cmd == 'h')
+				break;
+			else
+				outchar(' '), ++p;
+		}else if(*p == '\n')
+			break;
+		else
+			outchar(*p++);
+	}
+	outnl(0);
+}
+
+long
+simplenextoff(long fromoff)
+{
+	if(Bseek(bdict, fromoff, 0) < 0)
+		return -1;
+	if(Brdline(bdict, '\n') == 0)
+		return -1;
+	return Boffset(bdict);
+}
+
+void
+simpleprintkey(void)
+{
+	Bprint(bout, "No pronunciation key.\n");
+}
--- a/src/cmd/dict/slang.c
+++ b/src/cmd/dict/slang.c
@ -0,0 +1,203 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/* Possible tags */
+enum {
+	DF,		/* definition */
+	DX,		/* definition/example */
+	ET,		/* etymology */
+	EX,		/* example */
+	LA,		/* label */
+	ME,		/* main entry */
+	NU,		/* sense number */
+	PR,		/* pronunciation */
+	PS,		/* grammar part */
+	XR,		/* cross reference */
+	XX,		/* cross reference (whole entry) */
+};
+
+/* Assoc tables must be sorted on first field */
+
+static Assoc tagtab[] = {
+	{"df",	DF},
+	{"dx",	DX},
+	{"et",	ET},
+	{"ex",	EX},
+	{"la",	LA},
+	{"me",	ME},
+	{"nu",	NU},
+	{"pr",	PR},
+	{"ps",	PS},
+	{"xr",	XR},
+	{"xx",	XX},
+};
+static long	sget(char *, char *, char **, char **);
+static void	soutpiece(char *, char *);
+
+void
+slangprintentry(Entry e, int cmd)
+{
+	char *p, *pe, *vs, *ve;
+	long t;
+
+	p = e.start;
+	pe = e.end;
+	if(cmd == 'h') {
+		t = sget(p, pe, &vs, &ve);
+		if(t == ME)
+			soutpiece(vs, ve);
+		outnl(0);
+		return;
+	}
+	while(p < pe) {
+		switch(sget(p, pe, &vs, &ve)) {
+		case DF:
+			soutpiece(vs, ve);
+			outchars(".  ");
+			break;
+		case DX:
+			soutpiece(vs, ve);
+			outchars(".  ");
+			break;
+		case ET:
+			outchars("[");
+			soutpiece(vs, ve);
+			outchars("] ");
+			break;
+		case EX:
+			outchars("E.g., ");
+			soutpiece(vs, ve);
+			outchars(".  ");
+			break;
+		case LA:
+			outchars("(");
+			soutpiece(vs, ve);
+			outchars(") ");
+			break;
+		case ME:
+			outnl(0);
+			soutpiece(vs, ve);
+			outnl(0);
+			break;
+		case NU:
+			outnl(2);
+			soutpiece(vs, ve);
+			outchars(".  ");
+			break;
+		case PR:
+			outchars("[");
+			soutpiece(vs, ve);
+			outchars("] ");
+			break;
+		case PS:
+			outnl(1);
+			soutpiece(vs, ve);
+			outchars(". ");
+			break;
+		case XR:
+			outchars("See ");
+			soutpiece(vs, ve);
+			outchars(".  ");
+			break;
+		case XX:
+			outchars("See ");
+			soutpiece(vs, ve);
+			outchars(".  ");
+			break;
+		default:
+			ve = pe;	/* will end loop */
+			break;
+		}
+		p = ve;
+	}
+	outnl(0);
+}
+
+long
+slangnextoff(long fromoff)
+{
+	long a;
+	char *p;
+
+	a = Bseek(bdict, fromoff, 0);
+	if(a < 0)
+		return -1;
+	for(;;) {
+		p = Brdline(bdict, '\n');
+		if(!p)
+			break;
+		if(p[0] == 'm' && p[1] == 'e' && p[2] == ' ')
+			return (Boffset(bdict)-Blinelen(bdict));
+	}
+	return -1;
+}
+
+void
+slangprintkey(void)
+{
+	Bprint(bout, "No key\n");
+}
+
+/*
+ * Starting from b, find next line beginning with a tag.
+ * Don't go past e, but assume *e==0.
+ * Return tag value, or -1 if no more tags before e.
+ * Set pvb to beginning of value (after tag).
+ * Set pve to point at newline that ends the value.
+ */
+static long
+sget(char *b, char *e, char **pvb, char **pve)
+{
+	char *p;
+	char buf[3];
+	long t, tans;
+
+	buf[2] = 0;
+	tans = -1;
+	for(p = b;;) {
+		if(p[2] == ' ') {
+			buf[0] = p[0];
+			buf[1] = p[1];
+			t = lookassoc(tagtab, asize(tagtab), buf);
+			if(t < 0) {
+				if(debug)
+					err("tag %s\n", buf);
+				p += 3;
+			} else {
+				if(tans < 0) {
+					p += 3;
+					tans = t;
+					*pvb = p;
+				} else {
+					*pve = p;
+					break;
+				}
+			}
+		}
+		p = strchr(p, '\n');
+		if(!p || ++p >= e) {
+			if(tans >= 0)
+				*pve = e-1;
+			break;
+		}
+	}
+	return tans;
+}
+
+static void
+soutpiece(char *b, char *e)
+{
+	int c, lastc;
+
+	lastc = 0;
+	while(b < e) {
+		c = *b++;
+		if(c == '\n')
+			c = ' ';
+		if(!(c == ' ' && lastc == ' ') && c != '@')
+			outchar(c);
+		lastc = c;
+	}
+}
--- a/src/cmd/dict/t.awk
+++ b/src/cmd/dict/t.awk
@ -0,0 +1,13 @@
+NF == 2	{
+		if($2 !~ / or / || $2 ~ /\(or/)
+			print $0
+		else {
+			n = split($2, a, / or /)
+			for(i = 1; i <= n; i++) {
+				printf "%s\t%s\n", $1, a[i]
+			}
+		}
+	}
+NF != 2 {
+	print $0
+	}
--- a/src/cmd/dict/thesaurus.c
+++ b/src/cmd/dict/thesaurus.c
@ -0,0 +1,86 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+void
+thesprintentry(Entry e, int cmd)
+{
+	char *p, *pe;
+	int c, i;
+
+	p = e.start;
+	pe = e.end;
+	while(p < pe) {
+		c = *p++;
+		if(cmd == 'r') {
+			outchar(c);
+			continue;
+		}
+		switch(c) {
+		case '*':
+			c = *p++;
+			if(cmd == 'h' && c != 'L') {
+				outnl(0);
+				return;
+			}
+			if(c == 'L' && cmd != 'h')
+				outnl(0);
+			if(c == 'S') {
+				outchar('(');
+				outchar(*p++);
+				outchar(')');
+			}
+			break;
+		case '#':
+			c = *p++;
+			i = *p++ - '0' - 1;
+			if(i < 0 || i > 4)
+				break;
+			switch(c) {
+			case 'a': outrune(L"áàâäa"[i]); break;
+			case 'e': outrune(L"éèêëe"[i]); break;
+			case 'o': outrune(L"óòôöo"[i]); break;
+			case 'c': outrune(L"ccccç"[i]); break;
+			default: outchar(c); break;
+			}
+			break;
+		case '+':
+		case '<':
+			break;
+		case ' ':
+			if(cmd == 'h' && *p == '*') {
+				outnl(0);
+				return;
+			}
+		default:
+			outchar(c);
+		}
+	}
+	outnl(0);
+}
+
+long
+thesnextoff(long fromoff)
+{
+	long a;
+	char *p;
+
+	a = Bseek(bdict, fromoff, 0);
+	if(a < 0)
+		return -1;
+	for(;;) {
+		p = Brdline(bdict, '\n');
+		if(!p)
+			break;
+		if(p[0] == '*' && p[1] == 'L')
+			return (Boffset(bdict)-Blinelen(bdict));
+	}
+	return -1;
+}
+
+void
+thesprintkey(void)
+{
+	Bprint(bout, "No key\n");
+}
--- a/src/cmd/dict/utils.c
+++ b/src/cmd/dict/utils.c
@ -0,0 +1,577 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+Dict dicts[] = {
+	{"oed",		"Oxford English Dictionary, 2nd Ed.",
+	 "dict/oed2",	"dict/oed2index",
+	 oednextoff,	oedprintentry,		oedprintkey},
+	{"ahd",		"American Heritage Dictionary, 2nd College Ed.",
+	 "ahd/DICT.DB",	"ahd/index",
+	 ahdnextoff,	ahdprintentry,		ahdprintkey},
+	{"pgw",		"Project Gutenberg Webster Dictionary",
+	 "dict/pgw",	"dict/pgwindex",
+	 pgwnextoff,	pgwprintentry,		pgwprintkey},
+	{"thesaurus",	"Collins Thesaurus",
+	 "dict/thesaurus",	"dict/thesindex",
+	 thesnextoff,	thesprintentry,	thesprintkey},
+
+	{"ce",		"Gendai Chinese->English",
+	 "dict/world/sansdata/sandic24.dat",
+	 "dict/world/sansdata/ceindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"ceh",		"Gendai Chinese->English (Hanzi index)",
+	 "dict/world/sansdata/sandic24.dat",
+	 "dict/world/sansdata/cehindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"ec",		"Gendai English->Chinese",
+	 "dict/world/sansdata/sandic24.dat",
+	 "dict/world/sansdata/ecindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+
+	{"dae",		"Gyldendal Danish->English",
+	 "dict/world/gylddata/sandic30.dat",
+	 "dict/world/gylddata/daeindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"eda",		"Gyldendal English->Danish",
+	 "dict/world/gylddata/sandic29.dat",
+	 "dict/world/gylddata/edaindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+
+	{"due",		"Wolters-Noordhoff Dutch->English",
+	 "dict/world/woltdata/sandic07.dat",
+	 "dict/world/woltdata/deindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"edu",		"Wolters-Noordhoff English->Dutch",
+	 "dict/world/woltdata/sandic06.dat",
+	 "dict/world/woltdata/edindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+
+	{"fie",		"WSOY Finnish->English",
+	 "dict/world/werndata/sandic32.dat",
+	 "dict/world/werndata/fieindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"efi",		"WSOY English->Finnish",
+	 "dict/world/werndata/sandic31.dat",
+	 "dict/world/werndata/efiindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+
+	{"fe",		"Collins French->English",
+	 "dict/fe",	"dict/feindex",
+	 pcollnextoff,	pcollprintentry,	pcollprintkey},
+	{"ef",		"Collins English->French",
+	 "dict/ef",	"dict/efindex",
+	 pcollnextoff,	pcollprintentry,	pcollprintkey},
+
+	{"ge",		"Collins German->English",
+	 "dict/ge",	"dict/geindex",
+	 pcollgnextoff,	pcollgprintentry,	pcollgprintkey},
+	{"eg",		"Collins English->German",
+	 "dict/eg",	"dict/egindex",
+	 pcollgnextoff,	pcollgprintentry,	pcollgprintkey},
+
+	{"ie",		"Collins Italian->English",
+	 "dict/ie",	"dict/ieindex",
+	 pcollnextoff,	pcollprintentry,	pcollprintkey},
+	{"ei",		"Collins English->Italian",
+	 "dict/ei",	"dict/eiindex",
+	 pcollnextoff,	pcollprintentry,	pcollprintkey},
+
+	{"je",		"Sanshusha Japanese->English",
+	 "dict/world/sansdata/sandic18.dat",
+	 "dict/world/sansdata/jeindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"jek",		"Sanshusha Japanese->English (Kanji index)",
+	 "dict/world/sansdata/sandic18.dat",
+	 "dict/world/sansdata/jekindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"ej",		"Sanshusha English->Japanese",
+	 "dict/world/sansdata/sandic18.dat",
+	 "dict/world/sansdata/ejindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+
+	{"tjeg",	"Sanshusha technical Japanese->English,German",
+	 "dict/world/sansdata/sandic16.dat",
+	 "dict/world/sansdata/tjegindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"tjegk",	"Sanshusha technical Japanese->English,German (Kanji index)",
+	 "dict/world/sansdata/sandic16.dat",
+	 "dict/world/sansdata/tjegkindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"tegj",	"Sanshusha technical English->German,Japanese",
+	 "dict/world/sansdata/sandic16.dat",
+	 "dict/world/sansdata/tegjindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"tgje",	"Sanshusha technical German->Japanese,English",
+	 "dict/world/sansdata/sandic16.dat",
+	 "dict/world/sansdata/tgjeindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+
+	{"ne",		"Kunnskapforlaget Norwegian->English",
+	 "dict/world/kunndata/sandic28.dat",
+	 "dict/world/kunndata/neindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"en",		"Kunnskapforlaget English->Norwegian",
+	 "dict/world/kunndata/sandic27.dat",
+	 "dict/world/kunndata/enindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+
+	{"re",		"Leon Ungier Russian->English",
+	 "dict/re",	"dict/reindex",
+	 simplenextoff,	simpleprintentry,	simpleprintkey},
+	{"er",		"Leon Ungier English->Russian",
+	 "dict/re",	"dict/erindex",
+	 simplenextoff,	simpleprintentry,	simpleprintkey},
+
+	{"se",		"Collins Spanish->English",
+	 "dict/se",	"dict/seindex",
+	 pcollnextoff,	pcollprintentry,	pcollprintkey},
+	{"es",		"Collins English->Spanish",
+	 "dict/es",	"dict/esindex",
+	 pcollnextoff,	pcollprintentry,	pcollprintkey},
+
+	{"swe",		"Esselte Studium Swedish->English",
+	 "dict/world/essedata/sandic34.dat",
+	 "dict/world/essedata/sweindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+	{"esw",		"Esselte Studium English->Swedish",
+	 "dict/world/essedata/sandic33.dat",
+	 "dict/world/essedata/eswindex",
+	 worldnextoff,	worldprintentry,	worldprintkey},
+
+	{"movie",	"Movies -- by title",
+	 "movie/data",	"dict/movtindex",
+	 movienextoff,	movieprintentry,	movieprintkey},
+	{"moviea",	"Movies -- by actor",
+	 "movie/data",	"dict/movaindex",
+	 movienextoff,	movieprintentry,	movieprintkey},
+	{"movied",	"Movies -- by director",
+	 "movie/data",	"dict/movdindex",
+	 movienextoff,	movieprintentry,	movieprintkey},
+
+	{"slang",	"English Slang",
+	 "dict/slang",	"dict/slangindex",
+	 slangnextoff,	slangprintentry,	slangprintkey},
+
+	{"robert",	"Robert Électronique",
+	 "dict/robert/_pointers",	"dict/robert/_index",
+	 robertnextoff,	robertindexentry,	robertprintkey},
+	{"robertv",	"Robert Électronique - formes des verbes",
+	 "dict/robert/flex.rob",	"dict/robert/_flexindex",
+	 robertnextflex,	robertflexentry,	robertprintkey},
+
+	{0, 0, 0, 0, 0}
+};
+
+typedef struct Lig Lig;
+struct Lig {
+	Rune	start;		/* accent rune */
+	Rune	pairs[100];		/* <char,accented version> pairs */
+};
+
+static Lig ligtab[Nligs] = {
+[LACU-LIGS]	{0xb4,	{0x41, 0xc1, 0x61, 0xe1, 0x43, 0x106, 0x63, 0x107, 0x45, 0xc9, 0x65, 0xe9, 0x67, 0x123, 0x49, 0xcd, 0x69, 0xed, 0x131, 0xed, 0x4c, 0x139, 0x6c, 0x13a, 0x4e, 0x143, 0x6e, 0x144, 0x4f, 0xd3, 0x6f, 0xf3, 0x52, 0x154, 0x72, 0x155, 0x53, 0x15a, 0x73, 0x15b, 0x55, 0xda, 0x75, 0xfa, 0x59, 0xdd, 0x79, 0xfd, 0x5a, 0x179, 0x7a, 0x17a, 0}},
+[LGRV-LIGS]	{0x2cb,	{0x41, 0xc0, 0x61, 0xe0, 0x45, 0xc8, 0x65, 0xe8, 0x49, 0xcc, 0x69, 0xec, 0x131, 0xec, 0x4f, 0xd2, 0x6f, 0xf2, 0x55, 0xd9, 0x75, 0xf9, 0}},
+[LUML-LIGS]	{0xa8,	{0x41, 0xc4, 0x61, 0xe4, 0x45, 0xcb, 0x65, 0xeb, 0x49, 0xcf, 0x69, 0xef, 0x4f, 0xd6, 0x6f, 0xf6, 0x55, 0xdc, 0x75, 0xfc, 0x59, 0x178, 0x79, 0xff, 0}},
+[LCED-LIGS]	{0xb8,	{0x43, 0xc7, 0x63, 0xe7, 0x47, 0x122, 0x4b, 0x136, 0x6b, 0x137, 0x4c, 0x13b, 0x6c, 0x13c, 0x4e, 0x145, 0x6e, 0x146, 0x52, 0x156, 0x72, 0x157, 0x53, 0x15e, 0x73, 0x15f, 0x54, 0x162, 0x74, 0x163, 0}},
+[LTIL-LIGS]	{0x2dc,	{0x41, 0xc3, 0x61, 0xe3, 0x49, 0x128, 0x69, 0x129, 0x131, 0x129, 0x4e, 0xd1, 0x6e, 0xf1, 0x4f, 0xd5, 0x6f, 0xf5, 0x55, 0x168, 0x75, 0x169, 0}},
+[LBRV-LIGS]	{0x2d8,	{0x41, 0x102, 0x61, 0x103, 0x45, 0x114, 0x65, 0x115, 0x47, 0x11e, 0x67, 0x11f, 0x49, 0x12c, 0x69, 0x12d, 0x131, 0x12d, 0x4f, 0x14e, 0x6f, 0x14f, 0x55, 0x16c, 0x75, 0x16d, 0}},
+[LRNG-LIGS]	{0x2da,	{0x41, 0xc5, 0x61, 0xe5, 0x55, 0x16e, 0x75, 0x16f, 0}},
+[LDOT-LIGS]	{0x2d9,	{0x43, 0x10a, 0x63, 0x10b, 0x45, 0x116, 0x65, 0x117, 0x47, 0x120, 0x67, 0x121, 0x49, 0x130, 0x4c, 0x13f, 0x6c, 0x140, 0x5a, 0x17b, 0x7a, 0x17c, 0}},
+[LDTB-LIGS]	{0x2e,	{0}},
+[LFRN-LIGS]	{0x2322,	{0x41, 0xc2, 0x61, 0xe2, 0x43, 0x108, 0x63, 0x109, 0x45, 0xca, 0x65, 0xea, 0x47, 0x11c, 0x67, 0x11d, 0x48, 0x124, 0x68, 0x125, 0x49, 0xce, 0x69, 0xee, 0x131, 0xee, 0x4a, 0x134, 0x6a, 0x135, 0x4f, 0xd4, 0x6f, 0xf4, 0x53, 0x15c, 0x73, 0x15d, 0x55, 0xdb, 0x75, 0xfb, 0x57, 0x174, 0x77, 0x175, 0x59, 0x176, 0x79, 0x177, 0}},
+[LFRB-LIGS]	{0x32f,	{0}},
+[LOGO-LIGS]	{0x2db,	{0x41, 0x104, 0x61, 0x105, 0x45, 0x118, 0x65, 0x119, 0x49, 0x12e, 0x69, 0x12f, 0x131, 0x12f, 0x55, 0x172, 0x75, 0x173, 0}},
+[LMAC-LIGS]	{0xaf,	{0x41, 0x100, 0x61, 0x101, 0x45, 0x112, 0x65, 0x113, 0x49, 0x12a, 0x69, 0x12b, 0x131, 0x12b, 0x4f, 0x14c, 0x6f, 0x14d, 0x55, 0x16a, 0x75, 0x16b, 0}},
+[LHCK-LIGS]	{0x2c7,	{0x43, 0x10c, 0x63, 0x10d, 0x44, 0x10e, 0x64, 0x10f, 0x45, 0x11a, 0x65, 0x11b, 0x4c, 0x13d, 0x6c, 0x13e, 0x4e, 0x147, 0x6e, 0x148, 0x52, 0x158, 0x72, 0x159, 0x53, 0x160, 0x73, 0x161, 0x54, 0x164, 0x74, 0x165, 0x5a, 0x17d, 0x7a, 0x17e, 0}},
+[LASP-LIGS]	{0x2bd,	{0}},
+[LLEN-LIGS]	{0x2bc,	{0}},
+[LBRB-LIGS]	{0x32e,	{0}}
+};
+
+Rune multitab[Nmulti][5] = {
+[MAAS-MULTI]	{0x2bd, 0x3b1, 0},
+[MALN-MULTI]	{0x2bc, 0x3b1, 0},
+[MAND-MULTI]	{0x61, 0x6e, 0x64, 0},
+[MAOQ-MULTI]	{0x61, 0x2f, 0x71, 0},
+[MBRA-MULTI]	{0x3c, 0x7c, 0},
+[MDD-MULTI]	{0x2e, 0x2e, 0},
+[MDDD-MULTI]	{0x2e, 0x2e, 0x2e, 0},
+[MEAS-MULTI]	{0x2bd, 0x3b5, 0},
+[MELN-MULTI]	{0x2bc, 0x3b5, 0},
+[MEMM-MULTI]	{0x2014, 0x2014, 0},
+[MHAS-MULTI]	{0x2bd, 0x3b7, 0},
+[MHLN-MULTI]	{0x2bc, 0x3b7, 0},
+[MIAS-MULTI]	{0x2bd, 0x3b9, 0},
+[MILN-MULTI]	{0x2bc, 0x3b9, 0},
+[MLCT-MULTI]	{0x63, 0x74, 0},
+[MLFF-MULTI]	{0x66, 0x66, 0},
+[MLFFI-MULTI]	{0x66, 0x66, 0x69, 0},
+[MLFFL-MULTI]	{0x66, 0x66, 0x6c, 0},
+[MLFL-MULTI]	{0x66, 0x6c, 0},
+[MLFI-MULTI]	{0x66, 0x69, 0},
+[MLLS-MULTI]	{0x26b, 0x26b, 0},
+[MLST-MULTI]	{0x73, 0x74, 0},
+[MOAS-MULTI]	{0x2bd, 0x3bf, 0},
+[MOLN-MULTI]	{0x2bc, 0x3bf, 0},
+[MOR-MULTI]	{0x6f, 0x72, 0},
+[MRAS-MULTI]	{0x2bd, 0x3c1, 0},
+[MRLN-MULTI]	{0x2bc, 0x3c1, 0},
+[MTT-MULTI]	{0x7e, 0x7e, 0},
+[MUAS-MULTI]	{0x2bd, 0x3c5, 0},
+[MULN-MULTI]	{0x2bc, 0x3c5, 0},
+[MWAS-MULTI]	{0x2bd, 0x3c9, 0},
+[MWLN-MULTI]	{0x2bc, 0x3c9, 0},
+[MOE-MULTI]	{0x6f, 0x65, 0},
+[MES-MULTI]	{0x20, 0x20, 0},
+};
+
+#define	risupper(r)	(0x41 <= (r) && (r) <= 0x5a)
+#define	rislatin1(r)	(0xC0 <= (r) && (r) <= 0xFF)
+#define	rtolower(r)	((r)-'A'+'a')
+
+static Rune latin_fold_tab[] =
+{
+/*	Table to fold latin 1 characters to ASCII equivalents
+			based at Rune value 0xc0
+
+	 À    Á    Â    Ã    Ä    Å    Æ    Ç
+	 È    É    Ê    Ë    Ì    Í    Î    Ï
+	 Ð    Ñ    Ò    Ó    Ô    Õ    Ö    ×
+	 Ø    Ù    Ú    Û    Ü    Ý    Þ    ß
+	 à    á    â    ã    ä    å    æ    ç
+	 è    é    ê    ë    ì    í    î    ï
+	 ð    ñ    ò    ó    ô    õ    ö    ÷
+	 ø    ù    ú    û    ü    ý    þ    ÿ
+*/
+	'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+	'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+	'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
+	'o', 'u', 'u', 'u', 'u', 'y',  0 ,  0 ,
+	'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+	'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+	'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
+	'o', 'u', 'u', 'u', 'u', 'y',  0 , 'y',
+};
+
+static Rune 	*ttabstack[20];
+static int	ntt;
+
+/*
+ * tab is an array of n Assoc's, sorted by key.
+ * Look for key in tab, and return corresponding val
+ * or -1 if not there
+ */
+long
+lookassoc(Assoc *tab, int n, char *key)
+{
+	Assoc *q;
+	long i, low, high;
+	int r;
+
+	for(low = -1, high = n; high > low+1; ){
+		i = (high+low)/2;
+		q = &tab[i];
+		if((r=strcmp(key, q->key))<0)
+			high = i;
+		else if(r == 0)
+			return q->val;
+		else
+			low=i;
+	}
+	return -1;
+}
+
+long
+looknassoc(Nassoc *tab, int n, long key)
+{
+	Nassoc *q;
+	long i, low, high;
+
+	for(low = -1, high = n; high > low+1; ){
+		i = (high+low)/2;
+		q = &tab[i];
+		if(key < q->key)
+			high = i;
+		else if(key == q->key)
+			return q->val;
+		else
+			low=i;
+	}
+	return -1;
+}
+
+void
+err(char *fmt, ...)
+{
+	char buf[1000];
+	va_list v;
+
+	va_start(v, fmt);
+	vsnprint(buf, sizeof(buf), fmt, v);
+	va_end(v);
+	fprint(2, "%s: %s\n", argv0, buf);
+}
+
+/*
+ * Write the rune r to bout, keeping track of line length
+ * and breaking the lines (at blanks) when they get too long
+ */
+void
+outrune(long r)
+{
+	if(outinhibit)
+		return;
+	if(++linelen > breaklen && r == 0x20) {
+		Bputc(bout, '\n');
+		linelen = 0;
+	} else
+		Bputrune(bout, r);
+}
+
+void
+outrunes(Rune *rp)
+{
+	Rune r;
+
+	while((r = *rp++) != 0)
+		outrune(r);
+}
+
+/* like outrune, but when arg is know to be a char */
+void
+outchar(int c)
+{
+	if(outinhibit)
+		return;
+	if(++linelen > breaklen && c == ' ') {
+		c ='\n';
+		linelen = 0;
+	}
+	Bputc(bout, c);
+}
+
+void
+outchars(char *s)
+{
+	char c;
+
+	while((c = *s++) != 0)
+		outchar(c);
+}
+
+void
+outprint(char *fmt, ...)
+{
+	char buf[1000];
+	va_list v;
+
+	va_start(v, fmt);
+	vsnprint(buf, sizeof(buf), fmt, v);
+	va_end(v);
+	outchars(buf);
+}
+
+void
+outpiece(char *b, char *e)
+{
+	int c, lastc;
+
+	lastc = 0;
+	while(b < e) {
+		c = *b++;
+		if(c == '\n')
+			c = ' ';
+		if(!(c == ' ' && lastc == ' '))
+			outchar(c);
+		lastc = c;
+	}
+}
+
+/*
+ * Go to new line if not already there; indent if ind != 0.
+ * If ind > 1, leave a blank line too.
+ * Slight hack: assume if current line is only one or two
+ * characters long, then they were spaces.
+ */
+void
+outnl(int ind)
+{
+	if(outinhibit)
+		return;
+	if(ind) {
+		if(ind > 1) {
+			if(linelen > 2)
+				Bputc(bout, '\n');
+			Bprint(bout, "\n  ");
+		} else if(linelen == 0)
+			Bprint(bout, "  ");
+		else if(linelen == 1)
+			Bputc(bout, ' ');
+		else if(linelen != 2)
+			Bprint(bout, "\n  ");
+		linelen = 2;
+	} else {
+		if(linelen) {
+			Bputc(bout, '\n');
+			linelen = 0;
+		}
+	}
+}
+
+/*
+ * Fold the runes in null-terminated rp.
+ * Use the sort(1) definition of folding (uppercase to lowercase,
+ * latin1-accented characters to corresponding unaccented chars)
+ */
+void
+fold(Rune *rp)
+{
+	Rune r;
+
+	while((r = *rp) != 0) {
+		if (rislatin1(r) && latin_fold_tab[r-0xc0])
+				r = latin_fold_tab[r-0xc0];
+		if(risupper(r))
+			r = rtolower(r);
+		*rp++ = r;
+	}
+}
+
+/*
+ * Like fold, but put folded result into new
+ * (assumed to have enough space).
+ * old is a regular expression, but we know that
+ * metacharacters aren't affected
+ */
+void
+foldre(char *new, char *old)
+{
+	Rune r;
+
+	while(*old) {
+		old += chartorune(&r, old);
+		if (rislatin1(r) && latin_fold_tab[r-0xc0])
+				r = latin_fold_tab[r-0xc0];
+		if(risupper(r))
+			r = rtolower(r);
+		new += runetochar(new, &r);
+	}
+	*new = 0;
+}
+
+/*
+ *	acomp(s, t) returns:
+ *		-2 if s strictly precedes t
+ *		-1 if s is a prefix of t
+ *		0 if s is the same as t
+ *		1 if t is a prefix of s
+ *		2 if t strictly precedes s
+ */
+
+int
+acomp(Rune *s, Rune *t)
+{
+	int cs, ct;
+
+	for(;;) {
+		cs = *s;
+		ct = *t;
+		if(cs != ct)
+			break;
+		if(cs == 0)
+			return 0;
+		s++;
+		t++;
+	}
+	if(cs == 0)
+		return -1;
+	if(ct == 0)
+		return 1;
+	if(cs < ct)
+		return -2;
+	return 2;
+}
+
+/*
+ * Copy null terminated Runes from 'from' to 'to'.
+ */
+void
+runescpy(Rune *to, Rune *from)
+{
+	while((*to++ = *from++) != 0)
+		continue;
+}
+
+/*
+ * Conversion of unsigned number to long, no overflow detection
+ */
+long
+runetol(Rune *r)
+{
+	int c;
+	long n;
+
+	n = 0;
+	for(;; r++){
+		c = *r;
+		if(0x30<=c && c<=0x39)
+			c -= '0';
+		else
+			break;
+		n = n*10 + c;
+	}
+	return n;
+}
+
+/*
+ * See if there is a rune corresponding to the accented
+ * version of r with accent acc (acc in [LIGS..LIGE-1]),
+ * and return it if so, else return NONE.
+ */
+Rune
+liglookup(Rune acc, Rune r)
+{
+	Rune *p;
+
+	if(acc < LIGS || acc >= LIGE)
+		return NONE;
+	for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
+		if(*p == r)
+			return *(p+1);
+	return NONE;
+}
+
+/*
+ * Maintain a translation table stack (a translation table
+ * is an array of Runes indexed by bytes or 7-bit bytes).
+ * If starting is true, push the curtab onto the stack
+ * and return newtab; else pop the top of the stack and
+ * return it.
+ * If curtab is 0, initialize the stack and return.
+ */
+Rune *
+changett(Rune *curtab, Rune *newtab, int starting)
+{
+	if(curtab == 0) {
+		ntt = 0;
+		return 0;
+	}
+	if(starting) {
+		if(ntt >= asize(ttabstack)) {
+			if(debug)
+				err("translation stack overflow");
+			return curtab;
+		}
+		ttabstack[ntt++] = curtab;
+		return newtab;
+	} else {
+		if(ntt == 0) {
+			if(debug)
+				err("translation stack underflow");
+			return curtab;
+		}
+		return ttabstack[--ntt];
+	}
+}
--- a/src/cmd/dict/world.c
+++ b/src/cmd/dict/world.c
@ -0,0 +1,184 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+#include "kuten.h"
+
+/*
+ * Routines for handling dictionaries in the "Languages of the World"
+ * format.  worldnextoff *must* be called with <address of valid entry>+1.
+ */
+
+#define	GSHORT(p)	(((p)[0]<<8)|(p)[1])
+
+static void	putchar(int, int*);
+
+#define	NONE	0xffff
+
+/* adapted from jhelling@cs.ruu.nl (Jeroen Hellingman) */
+
+static Rune chartab[] = {
+
+/*00*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	L'\n',	0xe6,	0xf8,	0xe5,	0xe4,	0xf6,
+/*10*/	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,	NONE,
+	NONE,	NONE,	NONE,	0xc6,	0xd8,	0xc5,	0xc4,	0xd6,
+
+/*20*/	0x20,	0x21,	0x22,	0x23,	0x24,	0x25,	0x26,	'\'',
+	0x28,	0x29,	0x2a,	0x2b,	0x2c,	0x2d,	0x2e,	0x2f,
+/*30*/  0x30,	0x31,	0x32,	0x33,	0x34,	0x35,	0x36,	0x37,
+	0x38,	0x39,	0x3a,	0x3b,	0x3c,	0x3d,	0x3e,	0x3f,
+/*40*/  0x40,	0x41,	0x42,	0x43,	0x44,	0x45,	0x46,	0x47,
+	0x48,	0x49,	0x4a,	0x4b,	L'L',	0x4d,	0x4e,	0x4f,
+/*50*/	0x50,	0x51,	0x52,	0x53,	0x54,	0x55,	0x56,	0x57,
+	0x58,	0x59,	0x5a,	0x5b,	L'\\',	0x5d,	0x5e,	0x5f,
+/*60*/	0x60,	0x61,	0x62,	0x63,	0x64,	0x65,	0x66,	0x67,
+	0x68,	0x69,	0x6a,	0x6b,	0x6c,	0x6d,	0x6e,	0x6f,
+/*70*/	0x70,	0x71,	0x72,	0x73,	0x74,	0x75,	0x76,	0x77,
+	0x78,	0x79,	0x7a,	0x7b,	0x7c,	0x7d,	0x7e,	NONE,
+
+/*80*/	0xc7,	0xfc,	0xe9,	0xe2,	0xe4,	0xe0,	0xe5,	0xe7,
+	0xea,	0xeb,	0xe8,	0xef,	0xee,	0xec,	0xc4,	0xc5,
+/*90*/	0xc9,	0xe6,	0xc6,	0xf4,	0xf6,	0xf2,	0xfb,	0xf9,
+	0xff,	0xd6,	0xdc,	0xa2,	0xa3,	0xa5,	0x20a7,	0x283,
+/*a0*/	0xe1,	0xed,	0xf3,	0xfa,	0xf1,	0xd1,	0xaa,	0xba,
+	0xbf,	0x2310,	0xac,	0xbd,	0xbc,	0xa1,	0xab,	0xbb,
+
+/*b0*/	0x254,	0x259,	0xf0,	0x283,	0x292,	0x14b,	0x251,	0x7a,
+	0x26a,	0xf0,	0x292,	0xe3,	0x153,	0x169,	0x28c,	0x265,
+/*c0*/	0x280,	0xeb,	0x6c,	0x28c,	0xf5,	0xf1,	0x152,	NONE,
+	NONE,	0x53,	0x73,	0x5a,	0x7a,	NONE,	NONE,	NONE,
+/*d0*/	0xdf,	NONE,	NONE,	0x101,	0x12b,	0x16b,	0x113,	0x14d,	
+	NONE,	NONE,	NONE,	0x20,	NONE,	NONE,	NONE,	NONE,
+
+/*e0*/	0x3b1,	0x3b2,	0x3b3,	0x3c0,	0x3a3,	0x3c3,	0xb5,	0x3c4,
+	0x3a6,	0x398,	0x3a9,	0x3b4,	0x221e,	0xd8,	0x3b5,	0x2229,
+/*f0*/	0x2261,	0xb1,	0x2265,	0x2264,	0x2320,	0x2321,	0xf7,	0x2248,
+	0xb0,	0x2219,	0xb7,	NONE,	NONE,	NONE,	NONE,	NONE,
+};
+
+enum{ Utf, Kanahi, Kanalo=Kanahi+1, GBhi, GBlo=GBhi+1, };
+
+void
+worldprintentry(Entry e, int cmd)
+{
+	int nh, state[3];
+	uchar *p, *pe;
+
+	p = (uchar *)e.start;
+	pe = (uchar *)e.end;
+	nh = GSHORT(p);
+	p += 6;
+	if(cmd == 'h')
+		pe = p+nh;
+	state[0] = Utf;
+	state[1] = 0;
+	state[2] = 0;
+	while(p < pe){
+		if(cmd == 'r')
+			outchar(*p++);
+		else
+			putchar(*p++, state);
+	}
+	outnl(0);
+}
+
+long
+worldnextoff(long fromoff)
+{
+	int nh, np, nd;
+	uchar buf[6];
+
+	if(Bseek(bdict, fromoff-1, 0) < 0)
+		return -1;
+	if(Bread(bdict, buf, 6) != 6)
+		return -1;
+	nh = GSHORT(buf);
+	np = GSHORT(buf+2);
+	nd = GSHORT(buf+4);
+	return fromoff-1 + 6 + nh + np + nd;
+}
+
+static void
+putchar(int c, int *state)
+{
+	int xflag = 0;
+	Rune r;
+	int hi, lo;
+
+	switch(state[0]){
+	case Kanahi:
+	case GBhi:
+		if(CANS2JH(c) || c == 0xff){
+			state[0]++;
+			state[1] = c;
+			break;
+		}
+		/* fall through */
+	case Utf:
+		if(c == 0xfe){
+			state[0] = Kanahi;
+			break;
+		}else if(c == 0xff){
+			state[0] = GBhi;
+			break;
+		}
+		r = chartab[c];
+		if(r < 0x80 && state[2] == 0)
+			outchar(r);
+		else if(r == NONE){
+			switch(c){
+			case 0xfb:
+				if(!xflag){
+					state[2] = 1;
+					break;
+				}
+			case 0xfc:
+				if(!xflag){
+					state[2] = 0;
+					break;
+				}
+			case 0x10:
+			case 0xc7: case 0xc8:
+			case 0xd8: case 0xd9: case 0xda:
+			case 0xdc: case 0xdd: case 0xde: case 0xdf:
+			case 0xfd:
+				if(!xflag)
+					break;
+				/* fall through */
+			default:
+				outprint("\\%.2ux", c);
+			}
+		}else if(state[2] == 0)
+			outrune(r);
+		break;
+	case Kanalo:
+	case GBlo:
+		if(state[1] == 0xff && c == 0xff){
+			state[0] = Utf;
+			break;
+		}
+		state[0]--;
+		hi = state[1];
+		lo = c;
+		S2J(hi, lo);		/* convert to JIS */
+		r = hi*100 + lo - 3232;	/* convert to jis208 */
+		if(state[0] == Kanahi && r < JIS208MAX)
+			r = tabjis208[r];
+		else if(state[0] == GBhi && r < GB2312MAX)
+			r = tabgb2312[r];
+		else
+			r = NONE;
+		if(r == NONE)
+			outprint("\\%.2ux\\%.2ux", state[1], c);
+		else
+			outrune(r);
+		break;
+	}
+}
+
+void
+worldprintkey(void)
+{
+	Bprint(bout, "No pronunciation key.\n");
+}