mirror of
https://github.com/9fans/plan9port.git
synced 2025-01-15 11:20:03 +00:00
1425 lines
35 KiB
C
1425 lines
35 KiB
C
#include <u.h>
|
||
#include <libc.h>
|
||
#include <bio.h>
|
||
#include "dict.h"
|
||
|
||
enum {
|
||
Buflen=1000,
|
||
Maxaux=5,
|
||
};
|
||
|
||
/* Possible tags */
|
||
enum {
|
||
A, /* author in quote (small caps) */
|
||
B, /* bold */
|
||
Ba, /* author inside bib */
|
||
Bch, /* builtup chem component */
|
||
Bib, /* surrounds word 'in' for bibliographic ref */
|
||
Bl, /* bold */
|
||
Bo, /* bond over */
|
||
Bu, /* bond under */
|
||
Cb, /* ? block of stuff (indent) */
|
||
Cf, /* cross ref to another entry (italics) */
|
||
Chem, /* chemistry formula */
|
||
Co, /* over (preceding sum, integral, etc.) */
|
||
Col, /* column of table (aux just may be r) */
|
||
Cu, /* under (preceding sum, integral, etc.) */
|
||
Dat, /* date */
|
||
Db, /* def block? indent */
|
||
Dn, /* denominator of fraction */
|
||
E, /* main entry */
|
||
Ed, /* editor's comments (in [...]) */
|
||
Etym, /* etymology (in [...]) */
|
||
Fq, /* frequency count (superscript) */
|
||
Form, /* formula */
|
||
Fr, /* fraction (contains <nu>, then <dn>) */
|
||
Gk, /* greek (transliteration) */
|
||
Gr, /* grammar? (e.g., around 'pa.' in 'pa. pple.') */
|
||
Hg, /* headword group */
|
||
Hm, /* homonym (superscript) */
|
||
Hw, /* headword (bold) */
|
||
I, /* italics */
|
||
Il, /* italic list? */
|
||
In, /* inferior (subscript) */
|
||
L, /* row of col of table */
|
||
La, /* status or usage label (italic) */
|
||
Lc, /* chapter/verse sort of thing for works */
|
||
N, /* note (smaller type) */
|
||
Nu, /* numerator of fraction */
|
||
Ov, /* needs overline */
|
||
P, /* paragraph (indent) */
|
||
Ph, /* pronunciation (transliteration) */
|
||
Pi, /* pile (frac without line) */
|
||
Pqp, /* subblock of quote */
|
||
Pr, /* pronunciation (in (...)) */
|
||
Ps, /* position (e.g., adv.) (italic) */
|
||
Pt, /* part (in lc) */
|
||
Q, /* quote in quote block */
|
||
Qd, /* quote date (bold) */
|
||
Qig, /* quote number (greek) */
|
||
Qla, /* status or usage label in quote (italic) */
|
||
Qp, /* quote block (small type, indent) */
|
||
Qsn, /* quote number */
|
||
Qt, /* quote words */
|
||
R, /* roman type style */
|
||
Rx, /* relative cross reference (e.g., next) */
|
||
S, /* another form? (italic) */
|
||
S0, /* sense (sometimes surrounds several sx's) */
|
||
S1, /* sense (aux num: indented bold letter) */
|
||
S2, /* sense (aux num: indented bold capital rom num) */
|
||
S3, /* sense (aux num: indented number of asterisks) */
|
||
S4, /* sense (aux num: indented bold number) */
|
||
S5, /* sense (aux num: indented number of asterisks) */
|
||
S6, /* subsense (aux num: bold letter) */
|
||
S7a, /* subsense (aux num: letter) */
|
||
S7n, /* subsense (aux num: roman numeral) */
|
||
Sc, /* small caps */
|
||
Sgk, /* subsense (aux num: transliterated greek) */
|
||
Sn, /* sense of subdefinition (aux num: roman letter) */
|
||
Ss, /* sans serif */
|
||
Ssb, /* sans serif bold */
|
||
Ssi, /* sans serif italic */
|
||
Su, /* superior (superscript) */
|
||
Sub, /* subdefinition */
|
||
Table, /* table (aux cols=number of columns) */
|
||
Tt, /* title? (italics) */
|
||
Vd, /* numeric label for variant form */
|
||
Ve, /* variant entry */
|
||
Vf, /* variant form (light bold) */
|
||
Vfl, /* list of vf's (starts with Also or Forms) */
|
||
W, /* work (e.g., Beowulf) (italics) */
|
||
X, /* cross reference to main word (small caps) */
|
||
Xd, /* cross reference to quotation by date */
|
||
Xi, /* internal cross reference ? (italic) */
|
||
Xid, /* cross reference identifer, in quote ? */
|
||
Xs, /* cross reference sense (lower number) */
|
||
Xr, /* list of x's */
|
||
Ntag /* end of tags */
|
||
};
|
||
|
||
/* Assoc tables must be sorted on first field */
|
||
|
||
static Assoc tagtab[] = {
|
||
{"a", A},
|
||
{"b", B},
|
||
{"ba", Ba},
|
||
{"bch", Bch},
|
||
{"bib", Bib},
|
||
{"bl", Bl},
|
||
{"bo", Bo},
|
||
{"bu", Bu},
|
||
{"cb", Cb},
|
||
{"cf", Cf},
|
||
{"chem", Chem},
|
||
{"co", Co},
|
||
{"col", Col},
|
||
{"cu", Cu},
|
||
{"dat", Dat},
|
||
{"db", Db},
|
||
{"dn", Dn},
|
||
{"e", E},
|
||
{"ed", Ed},
|
||
{"et", Etym},
|
||
{"etym", Etym},
|
||
{"form", Form},
|
||
{"fq", Fq},
|
||
{"fr", Fr},
|
||
{"frac", Fr},
|
||
{"gk", Gk},
|
||
{"gr", Gr},
|
||
{"hg", Hg},
|
||
{"hm", Hm},
|
||
{"hw", Hw},
|
||
{"i", I},
|
||
{"il", Il},
|
||
{"in", In},
|
||
{"l", L},
|
||
{"la", La},
|
||
{"lc", Lc},
|
||
{"n", N},
|
||
{"nu", Nu},
|
||
{"ov", Ov},
|
||
{"p", P},
|
||
{"ph", Ph},
|
||
{"pi", Pi},
|
||
{"pqp", Pqp},
|
||
{"pr", Pr},
|
||
{"ps", Ps},
|
||
{"pt", Pt},
|
||
{"q", Q},
|
||
{"qd", Qd},
|
||
{"qig", Qig},
|
||
{"qla", Qla},
|
||
{"qp", Qp},
|
||
{"qsn", Qsn},
|
||
{"qt", Qt},
|
||
{"r", R},
|
||
{"rx", Rx},
|
||
{"s", S},
|
||
{"s0", S0},
|
||
{"s1", S1},
|
||
{"s2", S2},
|
||
{"s3", S3},
|
||
{"s4", S4},
|
||
{"s5", S5},
|
||
{"s6", S6},
|
||
{"s7a", S7a},
|
||
{"s7n", S7n},
|
||
{"sc", Sc},
|
||
{"sgk", Sgk},
|
||
{"sn", Sn},
|
||
{"ss", Ss,},
|
||
{"ssb", Ssb},
|
||
{"ssi", Ssi},
|
||
{"su", Su},
|
||
{"sub", Sub},
|
||
{"table", Table},
|
||
{"tt", Tt},
|
||
{"vd", Vd},
|
||
{"ve", Ve},
|
||
{"vf", Vf},
|
||
{"vfl", Vfl},
|
||
{"w", W},
|
||
{"x", X},
|
||
{"xd", Xd},
|
||
{"xi", Xi},
|
||
{"xid", Xid},
|
||
{"xr", Xr},
|
||
{"xs", Xs},
|
||
};
|
||
|
||
/* Possible tag auxilliary info */
|
||
enum {
|
||
Cols, /* number of columns in a table */
|
||
Num, /* letter or number, for a sense */
|
||
St, /* status (e.g., obs) */
|
||
Naux
|
||
};
|
||
|
||
static Assoc auxtab[] = {
|
||
{"cols", Cols},
|
||
{"num", Num},
|
||
{"st", St}
|
||
};
|
||
|
||
static Assoc spectab[] = {
|
||
{"3on4", 0xbe},
|
||
{"Aacu", 0xc1},
|
||
{"Aang", 0xc5},
|
||
{"Abarab", 0x100},
|
||
{"Acirc", 0xc2},
|
||
{"Ae", 0xc6},
|
||
{"Agrave", 0xc0},
|
||
{"Alpha", 0x391},
|
||
{"Amac", 0x100},
|
||
{"Asg", 0x1b7}, /* Unicyle. Cf "Sake" */
|
||
{"Auml", 0xc4},
|
||
{"Beta", 0x392},
|
||
{"Cced", 0xc7},
|
||
{"Chacek", 0x10c},
|
||
{"Chi", 0x3a7},
|
||
{"Chirho", 0x2627}, /* Chi Rho U+2627 */
|
||
{"Csigma", 0x3da},
|
||
{"Delta", 0x394},
|
||
{"Eacu", 0xc9},
|
||
{"Ecirc", 0xca},
|
||
{"Edh", 0xd0},
|
||
{"Epsilon", 0x395},
|
||
{"Eta", 0x397},
|
||
{"Gamma", 0x393},
|
||
{"Iacu", 0xcd},
|
||
{"Icirc", 0xce},
|
||
{"Imac", 0x12a},
|
||
{"Integ", 0x222b},
|
||
{"Iota", 0x399},
|
||
{"Kappa", 0x39a},
|
||
{"Koppa", 0x3de},
|
||
{"Lambda", 0x39b},
|
||
{"Lbar", 0x141},
|
||
{"Mu", 0x39c},
|
||
{"Naira", 0x4e}, /* should have bar through */
|
||
{"Nplus", 0x4e}, /* should have plus above */
|
||
{"Ntilde", 0xd1},
|
||
{"Nu", 0x39d},
|
||
{"Oacu", 0xd3},
|
||
{"Obar", 0xd8},
|
||
{"Ocirc", 0xd4},
|
||
{"Oe", 0x152},
|
||
{"Omega", 0x3a9},
|
||
{"Omicron", 0x39f},
|
||
{"Ouml", 0xd6},
|
||
{"Phi", 0x3a6},
|
||
{"Pi", 0x3a0},
|
||
{"Psi", 0x3a8},
|
||
{"Rho", 0x3a1},
|
||
{"Sacu", 0x15a},
|
||
{"Sigma", 0x3a3},
|
||
{"Summ", 0x2211},
|
||
{"Tau", 0x3a4},
|
||
{"Th", 0xde},
|
||
{"Theta", 0x398},
|
||
{"Tse", 0x426},
|
||
{"Uacu", 0xda},
|
||
{"Ucirc", 0xdb},
|
||
{"Upsilon", 0x3a5},
|
||
{"Uuml", 0xdc},
|
||
{"Wyn", 0x1bf}, /* wynn U+01BF */
|
||
{"Xi", 0x39e},
|
||
{"Ygh", 0x1b7}, /* Yogh U+01B7 */
|
||
{"Zeta", 0x396},
|
||
{"Zh", 0x1b7}, /* looks like Yogh. Cf "Sake" */
|
||
{"a", 0x61}, /* ante */
|
||
{"aacu", 0xe1},
|
||
{"aang", 0xe5},
|
||
{"aasper", MAAS},
|
||
{"abreve", 0x103},
|
||
{"acirc", 0xe2},
|
||
{"acu", LACU},
|
||
{"ae", 0xe6},
|
||
{"agrave", 0xe0},
|
||
{"ahook", 0x105},
|
||
{"alenis", MALN},
|
||
{"alpha", 0x3b1},
|
||
{"amac", 0x101},
|
||
{"amp", 0x26},
|
||
{"and", MAND},
|
||
{"ang", LRNG},
|
||
{"angle", 0x2220},
|
||
{"ankh", 0x2625}, /* ankh U+2625 */
|
||
{"ante", 0x61}, /* before (year) */
|
||
{"aonq", MAOQ},
|
||
{"appreq", 0x2243},
|
||
{"aquar", 0x2652},
|
||
{"arDadfull", 0x636}, /* Dad U+0636 */
|
||
{"arHa", 0x62d}, /* haa U+062D */
|
||
{"arTa", 0x62a}, /* taa U+062A */
|
||
{"arain", 0x639}, /* ain U+0639 */
|
||
{"arainfull", 0x639}, /* ain U+0639 */
|
||
{"aralif", 0x627}, /* alef U+0627 */
|
||
{"arba", 0x628}, /* baa U+0628 */
|
||
{"arha", 0x647}, /* ha U+0647 */
|
||
{"aries", 0x2648},
|
||
{"arnun", 0x646}, /* noon U+0646 */
|
||
{"arnunfull", 0x646}, /* noon U+0646 */
|
||
{"arpa", 0x647}, /* ha U+0647 */
|
||
{"arqoph", 0x642}, /* qaf U+0642 */
|
||
{"arshinfull", 0x634}, /* sheen U+0634 */
|
||
{"arta", 0x62a}, /* taa U+062A */
|
||
{"artafull", 0x62a}, /* taa U+062A */
|
||
{"artha", 0x62b}, /* thaa U+062B */
|
||
{"arwaw", 0x648}, /* waw U+0648 */
|
||
{"arya", 0x64a}, /* ya U+064A */
|
||
{"aryafull", 0x64a}, /* ya U+064A */
|
||
{"arzero", 0x660}, /* indic zero U+0660 */
|
||
{"asg", 0x292}, /* unicycle character. Cf "hallow" */
|
||
{"asper", LASP},
|
||
{"assert", 0x22a2},
|
||
{"astm", 0x2042}, /* asterism: should be upside down */
|
||
{"at", 0x40},
|
||
{"atilde", 0xe3},
|
||
{"auml", 0xe4},
|
||
{"ayin", 0x639}, /* arabic ain U+0639 */
|
||
{"b1", 0x2d}, /* single bond */
|
||
{"b2", 0x3d}, /* double bond */
|
||
{"b3", 0x2261}, /* triple bond */
|
||
{"bbar", 0x180}, /* b with bar U+0180 */
|
||
{"beta", 0x3b2},
|
||
{"bigobl", 0x2f},
|
||
{"blC", 0x43}, /* should be black letter */
|
||
{"blJ", 0x4a}, /* should be black letter */
|
||
{"blU", 0x55}, /* should be black letter */
|
||
{"blb", 0x62}, /* should be black letter */
|
||
{"blozenge", 0x25ca}, /* U+25CA; should be black */
|
||
{"bly", 0x79}, /* should be black letter */
|
||
{"bra", MBRA},
|
||
{"brbl", LBRB},
|
||
{"breve", LBRV},
|
||
{"bslash", L'\\'},
|
||
{"bsquare", 0x25a0}, /* black square U+25A0 */
|
||
{"btril", 0x25c0}, /* U+25C0 */
|
||
{"btrir", 0x25b6}, /* U+25B6 */
|
||
{"c", 0x63}, /* circa */
|
||
{"cab", 0x232a},
|
||
{"cacu", 0x107},
|
||
{"canc", 0x264b},
|
||
{"capr", 0x2651},
|
||
{"caret", 0x5e},
|
||
{"cb", 0x7d},
|
||
{"cbigb", 0x7d},
|
||
{"cbigpren", 0x29},
|
||
{"cbigsb", 0x5d},
|
||
{"cced", 0xe7},
|
||
{"cdil", LCED},
|
||
{"cdsb", 0x301b}, /* ]] U+301b */
|
||
{"cent", 0xa2},
|
||
{"chacek", 0x10d},
|
||
{"chi", 0x3c7},
|
||
{"circ", LRNG},
|
||
{"circa", 0x63}, /* about (year) */
|
||
{"circbl", 0x325}, /* ring below accent U+0325 */
|
||
{"circle", 0x25cb}, /* U+25CB */
|
||
{"circledot", 0x2299},
|
||
{"click", 0x296},
|
||
{"club", 0x2663},
|
||
{"comtime", 0x43},
|
||
{"conj", 0x260c},
|
||
{"cprt", 0xa9},
|
||
{"cq", '\''},
|
||
{"cqq", 0x201d},
|
||
{"cross", 0x2720}, /* maltese cross U+2720 */
|
||
{"crotchet", 0x2669},
|
||
{"csb", 0x5d},
|
||
{"ctilde", 0x63}, /* +tilde */
|
||
{"ctlig", MLCT},
|
||
{"cyra", 0x430},
|
||
{"cyre", 0x435},
|
||
{"cyrhard", 0x44a},
|
||
{"cyrjat", 0x463},
|
||
{"cyrm", 0x43c},
|
||
{"cyrn", 0x43d},
|
||
{"cyrr", 0x440},
|
||
{"cyrsoft", 0x44c},
|
||
{"cyrt", 0x442},
|
||
{"cyry", 0x44b},
|
||
{"dag", 0x2020},
|
||
{"dbar", 0x111},
|
||
{"dblar", 0x21cb},
|
||
{"dblgt", 0x226b},
|
||
{"dbllt", 0x226a},
|
||
{"dced", 0x64}, /* +cedilla */
|
||
{"dd", MDD},
|
||
{"ddag", 0x2021},
|
||
{"ddd", MDDD},
|
||
{"decr", 0x2193},
|
||
{"deg", 0xb0},
|
||
{"dele", 0x64}, /* should be dele */
|
||
{"delta", 0x3b4},
|
||
{"descnode", 0x260b}, /* descending node U+260B */
|
||
{"diamond", 0x2662},
|
||
{"digamma", 0x3dd},
|
||
{"div", 0xf7},
|
||
{"dlessi", 0x131},
|
||
{"dlessj1", 0x6a}, /* should be dotless */
|
||
{"dlessj2", 0x6a}, /* should be dotless */
|
||
{"dlessj3", 0x6a}, /* should be dotless */
|
||
{"dollar", 0x24},
|
||
{"dotab", LDOT},
|
||
{"dotbl", LDTB},
|
||
{"drachm", 0x292},
|
||
{"dubh", 0x2d},
|
||
{"eacu", 0xe9},
|
||
{"earth", 0x2641},
|
||
{"easper", MEAS},
|
||
{"ebreve", 0x115},
|
||
{"ecirc", 0xea},
|
||
{"edh", 0xf0},
|
||
{"egrave", 0xe8},
|
||
{"ehacek", 0x11b},
|
||
{"ehook", 0x119},
|
||
{"elem", 0x220a},
|
||
{"elenis", MELN},
|
||
{"em", 0x2014},
|
||
{"emac", 0x113},
|
||
{"emem", MEMM},
|
||
{"en", 0x2013},
|
||
{"epsilon", 0x3b5},
|
||
{"equil", 0x21cb},
|
||
{"ergo", 0x2234},
|
||
{"es", MES},
|
||
{"eszett", 0xdf},
|
||
{"eta", 0x3b7},
|
||
{"eth", 0xf0},
|
||
{"euml", 0xeb},
|
||
{"expon", 0x2191},
|
||
{"fact", 0x21},
|
||
{"fata", 0x251},
|
||
{"fatpara", 0xb6}, /* should have fatter, filled in bowl */
|
||
{"female", 0x2640},
|
||
{"ffilig", MLFFI},
|
||
{"fflig", MLFF},
|
||
{"ffllig", MLFFL},
|
||
{"filig", MLFI},
|
||
{"flat", 0x266d},
|
||
{"fllig", MLFL},
|
||
{"frE", 0x45}, /* should be curly */
|
||
{"frL", L'L'}, /* should be curly */
|
||
{"frR", 0x52}, /* should be curly */
|
||
{"frakB", 0x42}, /* should have fraktur style */
|
||
{"frakG", 0x47},
|
||
{"frakH", 0x48},
|
||
{"frakI", 0x49},
|
||
{"frakM", 0x4d},
|
||
{"frakU", 0x55},
|
||
{"frakX", 0x58},
|
||
{"frakY", 0x59},
|
||
{"frakh", 0x68},
|
||
{"frbl", LFRB},
|
||
{"frown", LFRN},
|
||
{"fs", 0x20},
|
||
{"fsigma", 0x3c2},
|
||
{"gAacu", 0xc1}, /* should be Α+acute */
|
||
{"gaacu", 0x3b1}, /* +acute */
|
||
{"gabreve", 0x3b1}, /* +breve */
|
||
{"gafrown", 0x3b1}, /* +frown */
|
||
{"gagrave", 0x3b1}, /* +grave */
|
||
{"gamac", 0x3b1}, /* +macron */
|
||
{"gamma", 0x3b3},
|
||
{"gauml", 0x3b1}, /* +umlaut */
|
||
{"ge", 0x2267},
|
||
{"geacu", 0x3b5}, /* +acute */
|
||
{"gegrave", 0x3b5}, /* +grave */
|
||
{"ghacu", 0x3b7}, /* +acute */
|
||
{"ghfrown", 0x3b7}, /* +frown */
|
||
{"ghgrave", 0x3b7}, /* +grave */
|
||
{"ghmac", 0x3b7}, /* +macron */
|
||
{"giacu", 0x3b9}, /* +acute */
|
||
{"gibreve", 0x3b9}, /* +breve */
|
||
{"gifrown", 0x3b9}, /* +frown */
|
||
{"gigrave", 0x3b9}, /* +grave */
|
||
{"gimac", 0x3b9}, /* +macron */
|
||
{"giuml", 0x3b9}, /* +umlaut */
|
||
{"glagjat", 0x467},
|
||
{"glots", 0x2c0},
|
||
{"goacu", 0x3bf}, /* +acute */
|
||
{"gobreve", 0x3bf}, /* +breve */
|
||
{"grave", LGRV},
|
||
{"gt", 0x3e},
|
||
{"guacu", 0x3c5}, /* +acute */
|
||
{"gufrown", 0x3c5}, /* +frown */
|
||
{"gugrave", 0x3c5}, /* +grave */
|
||
{"gumac", 0x3c5}, /* +macron */
|
||
{"guuml", 0x3c5}, /* +umlaut */
|
||
{"gwacu", 0x3c9}, /* +acute */
|
||
{"gwfrown", 0x3c9}, /* +frown */
|
||
{"gwgrave", 0x3c9}, /* +grave */
|
||
{"hacek", LHCK},
|
||
{"halft", 0x2308},
|
||
{"hash", 0x23},
|
||
{"hasper", MHAS},
|
||
{"hatpath", 0x5b2}, /* hataf patah U+05B2 */
|
||
{"hatqam", 0x5b3}, /* hataf qamats U+05B3 */
|
||
{"hatseg", 0x5b1}, /* hataf segol U+05B1 */
|
||
{"hbar", 0x127},
|
||
{"heart", 0x2661},
|
||
{"hebaleph", 0x5d0}, /* aleph U+05D0 */
|
||
{"hebayin", 0x5e2}, /* ayin U+05E2 */
|
||
{"hebbet", 0x5d1}, /* bet U+05D1 */
|
||
{"hebbeth", 0x5d1}, /* bet U+05D1 */
|
||
{"hebcheth", 0x5d7}, /* bet U+05D7 */
|
||
{"hebdaleth", 0x5d3}, /* dalet U+05D3 */
|
||
{"hebgimel", 0x5d2}, /* gimel U+05D2 */
|
||
{"hebhe", 0x5d4}, /* he U+05D4 */
|
||
{"hebkaph", 0x5db}, /* kaf U+05DB */
|
||
{"heblamed", 0x5dc}, /* lamed U+05DC */
|
||
{"hebmem", 0x5de}, /* mem U+05DE */
|
||
{"hebnun", 0x5e0}, /* nun U+05E0 */
|
||
{"hebnunfin", 0x5df}, /* final nun U+05DF */
|
||
{"hebpe", 0x5e4}, /* pe U+05E4 */
|
||
{"hebpedag", 0x5e3}, /* final pe? U+05E3 */
|
||
{"hebqoph", 0x5e7}, /* qof U+05E7 */
|
||
{"hebresh", 0x5e8}, /* resh U+05E8 */
|
||
{"hebshin", 0x5e9}, /* shin U+05E9 */
|
||
{"hebtav", 0x5ea}, /* tav U+05EA */
|
||
{"hebtsade", 0x5e6}, /* tsadi U+05E6 */
|
||
{"hebwaw", 0x5d5}, /* vav? U+05D5 */
|
||
{"hebyod", 0x5d9}, /* yod U+05D9 */
|
||
{"hebzayin", 0x5d6}, /* zayin U+05D6 */
|
||
{"hgz", 0x292}, /* ??? Cf "alet" */
|
||
{"hireq", 0x5b4}, /* U+05B4 */
|
||
{"hlenis", MHLN},
|
||
{"hook", LOGO},
|
||
{"horizE", 0x45}, /* should be on side */
|
||
{"horizP", 0x50}, /* should be on side */
|
||
{"horizS", 0x223d},
|
||
{"horizT", 0x22a3},
|
||
{"horizb", 0x7b}, /* should be underbrace */
|
||
{"ia", 0x3b1},
|
||
{"iacu", 0xed},
|
||
{"iasper", MIAS},
|
||
{"ib", 0x3b2},
|
||
{"ibar", 0x268},
|
||
{"ibreve", 0x12d},
|
||
{"icirc", 0xee},
|
||
{"id", 0x3b4},
|
||
{"ident", 0x2261},
|
||
{"ie", 0x3b5},
|
||
{"ifilig", MLFI},
|
||
{"ifflig", MLFF},
|
||
{"ig", 0x3b3},
|
||
{"igrave", 0xec},
|
||
{"ih", 0x3b7},
|
||
{"ii", 0x3b9},
|
||
{"ik", 0x3ba},
|
||
{"ilenis", MILN},
|
||
{"imac", 0x12b},
|
||
{"implies", 0x21d2},
|
||
{"index", 0x261e},
|
||
{"infin", 0x221e},
|
||
{"integ", 0x222b},
|
||
{"intsec", 0x2229},
|
||
{"invpri", 0x2cf},
|
||
{"iota", 0x3b9},
|
||
{"iq", 0x3c8},
|
||
{"istlig", MLST},
|
||
{"isub", 0x3f5}, /* iota below accent */
|
||
{"iuml", 0xef},
|
||
{"iz", 0x3b6},
|
||
{"jup", 0x2643},
|
||
{"kappa", 0x3ba},
|
||
{"koppa", 0x3df},
|
||
{"lambda", 0x3bb},
|
||
{"lar", 0x2190},
|
||
{"lbar", 0x142},
|
||
{"le", 0x2266},
|
||
{"lenis", LLEN},
|
||
{"leo", 0x264c},
|
||
{"lhalfbr", 0x2308},
|
||
{"lhshoe", 0x2283},
|
||
{"libra", 0x264e},
|
||
{"llswing", MLLS},
|
||
{"lm", 0x2d0},
|
||
{"logicand", 0x2227},
|
||
{"logicor", 0x2228},
|
||
{"longs", 0x283},
|
||
{"lrar", 0x2194},
|
||
{"lt", 0x3c},
|
||
{"ltappr", 0x227e},
|
||
{"ltflat", 0x2220},
|
||
{"lumlbl", 0x6c}, /* +umlaut below */
|
||
{"mac", LMAC},
|
||
{"male", 0x2642},
|
||
{"mc", 0x63}, /* should be raised */
|
||
{"merc", 0x263f}, /* mercury U+263F */
|
||
{"min", 0x2212},
|
||
{"moonfq", 0x263d}, /* first quarter moon U+263D */
|
||
{"moonlq", 0x263e}, /* last quarter moon U+263E */
|
||
{"msylab", 0x6d}, /* +sylab (ˌ) */
|
||
{"mu", 0x3bc},
|
||
{"nacu", 0x144},
|
||
{"natural", 0x266e},
|
||
{"neq", 0x2260},
|
||
{"nfacu", 0x2032},
|
||
{"nfasper", 0x2bd},
|
||
{"nfbreve", 0x2d8},
|
||
{"nfced", 0xb8},
|
||
{"nfcirc", 0x2c6},
|
||
{"nffrown", 0x2322},
|
||
{"nfgra", 0x2cb},
|
||
{"nfhacek", 0x2c7},
|
||
{"nfmac", 0xaf},
|
||
{"nftilde", 0x2dc},
|
||
{"nfuml", 0xa8},
|
||
{"ng", 0x14b},
|
||
{"not", 0xac},
|
||
{"notelem", 0x2209},
|
||
{"ntilde", 0xf1},
|
||
{"nu", 0x3bd},
|
||
{"oab", 0x2329},
|
||
{"oacu", 0xf3},
|
||
{"oasper", MOAS},
|
||
{"ob", 0x7b},
|
||
{"obar", 0xf8},
|
||
{"obigb", 0x7b}, /* should be big */
|
||
{"obigpren", 0x28},
|
||
{"obigsb", 0x5b}, /* should be big */
|
||
{"obreve", 0x14f},
|
||
{"ocirc", 0xf4},
|
||
{"odsb", 0x301a}, /* [[ U+301A */
|
||
{"oe", 0x153},
|
||
{"oeamp", 0x26},
|
||
{"ograve", 0xf2},
|
||
{"ohook", 0x6f}, /* +hook */
|
||
{"olenis", MOLN},
|
||
{"omac", 0x14d},
|
||
{"omega", 0x3c9},
|
||
{"omicron", 0x3bf},
|
||
{"ope", 0x25b},
|
||
{"opp", 0x260d},
|
||
{"oq", 0x60},
|
||
{"oqq", 0x201c},
|
||
{"or", MOR},
|
||
{"osb", 0x5b},
|
||
{"otilde", 0xf5},
|
||
{"ouml", 0xf6},
|
||
{"ounce", 0x2125}, /* ounce U+2125 */
|
||
{"ovparen", 0x2322}, /* should be sideways ( */
|
||
{"p", 0x2032},
|
||
{"pa", 0x2202},
|
||
{"page", 0x50},
|
||
{"pall", 0x28e},
|
||
{"paln", 0x272},
|
||
{"par", PAR},
|
||
{"para", 0xb6},
|
||
{"pbar", 0x70}, /* +bar */
|
||
{"per", 0x2118}, /* per U+2118 */
|
||
{"phi", 0x3c6},
|
||
{"phi2", 0x3d5},
|
||
{"pi", 0x3c0},
|
||
{"pisces", 0x2653},
|
||
{"planck", 0x127},
|
||
{"plantinJ", 0x4a}, /* should be script */
|
||
{"pm", 0xb1},
|
||
{"pmil", 0x2030},
|
||
{"pp", 0x2033},
|
||
{"ppp", 0x2034},
|
||
{"prop", 0x221d},
|
||
{"psi", 0x3c8},
|
||
{"pstlg", 0xa3},
|
||
{"q", 0x3f}, /* should be raised */
|
||
{"qamets", 0x5b3}, /* U+05B3 */
|
||
{"quaver", 0x266a},
|
||
{"rar", 0x2192},
|
||
{"rasper", MRAS},
|
||
{"rdot", 0xb7},
|
||
{"recipe", 0x211e}, /* U+211E */
|
||
{"reg", 0xae},
|
||
{"revC", 0x186}, /* open O U+0186 */
|
||
{"reva", 0x252},
|
||
{"revc", 0x254},
|
||
{"revope", 0x25c},
|
||
{"revr", 0x279},
|
||
{"revsc", 0x2d2}, /* upside-down semicolon */
|
||
{"revv", 0x28c},
|
||
{"rfa", 0x6f}, /* +hook (Cf "goal") */
|
||
{"rhacek", 0x159},
|
||
{"rhalfbr", 0x2309},
|
||
{"rho", 0x3c1},
|
||
{"rhshoe", 0x2282},
|
||
{"rlenis", MRLN},
|
||
{"rsylab", 0x72}, /* +sylab */
|
||
{"runash", 0x46}, /* should be runic 'ash' */
|
||
{"rvow", 0x2d4},
|
||
{"sacu", 0x15b},
|
||
{"sagit", 0x2650},
|
||
{"sampi", 0x3e1},
|
||
{"saturn", 0x2644},
|
||
{"sced", 0x15f},
|
||
{"schwa", 0x259},
|
||
{"scorpio", 0x264f},
|
||
{"scrA", 0x41}, /* should be script */
|
||
{"scrC", 0x43},
|
||
{"scrE", 0x45},
|
||
{"scrF", 0x46},
|
||
{"scrI", 0x49},
|
||
{"scrJ", 0x4a},
|
||
{"scrL", L'L'},
|
||
{"scrO", 0x4f},
|
||
{"scrP", 0x50},
|
||
{"scrQ", 0x51},
|
||
{"scrS", 0x53},
|
||
{"scrT", 0x54},
|
||
{"scrb", 0x62},
|
||
{"scrd", 0x64},
|
||
{"scrh", 0x68},
|
||
{"scrl", 0x6c},
|
||
{"scruple", 0x2108}, /* U+2108 */
|
||
{"sdd", 0x2d0},
|
||
{"sect", 0xa7},
|
||
{"semE", 0x2203},
|
||
{"sh", 0x283},
|
||
{"shacek", 0x161},
|
||
{"sharp", 0x266f},
|
||
{"sheva", 0x5b0}, /* U+05B0 */
|
||
{"shti", 0x26a},
|
||
{"shtsyll", 0x222a},
|
||
{"shtu", 0x28a},
|
||
{"sidetri", 0x22b2},
|
||
{"sigma", 0x3c3},
|
||
{"since", 0x2235},
|
||
{"slge", 0x2265}, /* should have slanted line under */
|
||
{"slle", 0x2264}, /* should have slanted line under */
|
||
{"sm", 0x2c8},
|
||
{"smm", 0x2cc},
|
||
{"spade", 0x2660},
|
||
{"sqrt", 0x221a},
|
||
{"square", 0x25a1}, /* U+25A1 */
|
||
{"ssChi", 0x3a7}, /* should be sans serif */
|
||
{"ssIota", 0x399},
|
||
{"ssOmicron", 0x39f},
|
||
{"ssPi", 0x3a0},
|
||
{"ssRho", 0x3a1},
|
||
{"ssSigma", 0x3a3},
|
||
{"ssTau", 0x3a4},
|
||
{"star", 0x2a},
|
||
{"stlig", MLST},
|
||
{"sup2", 0x2072},
|
||
{"supgt", 0x2c3},
|
||
{"suplt", 0x2c2},
|
||
{"sur", 0x2b3},
|
||
{"swing", 0x223c},
|
||
{"tau", 0x3c4},
|
||
{"taur", 0x2649},
|
||
{"th", 0xfe},
|
||
{"thbar", 0xfe}, /* +bar */
|
||
{"theta", 0x3b8},
|
||
{"thinqm", 0x3f}, /* should be thinner */
|
||
{"tilde", LTIL},
|
||
{"times", 0xd7},
|
||
{"tri", 0x2206},
|
||
{"trli", 0x2016},
|
||
{"ts", 0x2009},
|
||
{"uacu", 0xfa},
|
||
{"uasper", MUAS},
|
||
{"ubar", 0x75}, /* +bar */
|
||
{"ubreve", 0x16d},
|
||
{"ucirc", 0xfb},
|
||
{"udA", 0x2200},
|
||
{"udT", 0x22a5},
|
||
{"uda", 0x250},
|
||
{"udh", 0x265},
|
||
{"udqm", 0xbf},
|
||
{"udpsi", 0x22d4},
|
||
{"udtr", 0x2207},
|
||
{"ugrave", 0xf9},
|
||
{"ulenis", MULN},
|
||
{"umac", 0x16b},
|
||
{"uml", LUML},
|
||
{"undl", 0x2cd}, /* underline accent */
|
||
{"union", 0x222a},
|
||
{"upsilon", 0x3c5},
|
||
{"uuml", 0xfc},
|
||
{"vavpath", 0x5d5}, /* vav U+05D5 (+patah) */
|
||
{"vavsheva", 0x5d5}, /* vav U+05D5 (+sheva) */
|
||
{"vb", 0x7c},
|
||
{"vddd", 0x22ee},
|
||
{"versicle2", 0x2123}, /* U+2123 */
|
||
{"vinc", 0xaf},
|
||
{"virgo", 0x264d},
|
||
{"vpal", 0x25f},
|
||
{"vvf", 0x263},
|
||
{"wasper", MWAS},
|
||
{"wavyeq", 0x2248},
|
||
{"wlenis", MWLN},
|
||
{"wyn", 0x1bf}, /* wynn U+01BF */
|
||
{"xi", 0x3be},
|
||
{"yacu", 0xfd},
|
||
{"ycirc", 0x177},
|
||
{"ygh", 0x292},
|
||
{"ymac", 0x79}, /* +macron */
|
||
{"yuml", 0xff},
|
||
{"zced", 0x7a}, /* +cedilla */
|
||
{"zeta", 0x3b6},
|
||
{"zh", 0x292},
|
||
{"zhacek", 0x17e},
|
||
};
|
||
/*
|
||
The following special characters don't have close enough
|
||
equivalents in Unicode, so aren't in the above table.
|
||
22n 2^(2^n) Cf Fermat
|
||
2on4 2/4
|
||
3on8 3/8
|
||
Bantuo Bantu O. Cf Otshi-herero
|
||
Car C with circular arrow on top
|
||
albrtime cut-time: C with vertical line
|
||
ardal Cf dental
|
||
bantuo Bantu o. Cf Otshi-herero
|
||
bbc1 single chem bond below
|
||
bbc2 double chem bond below
|
||
bbl1 chem bond like /
|
||
bbl2 chem bond like //
|
||
bbr1 chem bond like \
|
||
bbr2 chem bond \\
|
||
bcop1 copper symbol. Cf copper
|
||
bcop2 copper symbol. Cf copper
|
||
benchm Cf benchmark
|
||
btc1 single chem bond above
|
||
btc2 double chem bond above
|
||
btl1 chem bond like \
|
||
btl2 chem bond like \\
|
||
btr1 chem bond like /
|
||
btr2 chem bond line //
|
||
burman Cf Burman
|
||
devph sanskrit letter. Cf ph
|
||
devrfls sanskrit letter. Cf cerebral
|
||
duplong[12] musical note
|
||
egchi early form of chi
|
||
eggamma[12] early form of gamma
|
||
egiota early form of iota
|
||
egkappa early form of kappa
|
||
eglambda early form of lambda
|
||
egmu[12] early form of mu
|
||
egnu[12] early form of nu
|
||
egpi[123] early form of pi
|
||
egrho[12] early form of rho
|
||
egsampi early form of sampi
|
||
egsan early form of san
|
||
egsigma[12] early form of sigma
|
||
egxi[123] early form of xi
|
||
elatS early form of S
|
||
elatc[12] early form of C
|
||
elatg[12] early form of G
|
||
glagjeri Slavonic Glagolitic jeri
|
||
glagjeru Slavonic Glagolitic jeru
|
||
hypolem hypolemisk (line with underdot)
|
||
lhrbr lower half }
|
||
longmord long mordent
|
||
mbwvow backwards scretched C. Cf retract.
|
||
mord music symbol. Cf mordent
|
||
mostra Cf direct
|
||
ohgcirc old form of circumflex
|
||
oldbeta old form of β. Cf perturbate
|
||
oldsemibr[12] old forms of semibreve. Cf prolation
|
||
ormg old form of g. Cf G
|
||
para[12345] form of ¶
|
||
pauseo musical pause sign
|
||
pauseu musical pause sign
|
||
pharyng Cf pharyngal
|
||
ragr Black letter ragged r
|
||
repetn musical repeat. Cf retort
|
||
segno musical segno sign
|
||
semain[12] semitic ain
|
||
semhe semitic he
|
||
semheth semitic heth
|
||
semkaph semitic kaph
|
||
semlamed[12] semitic lamed
|
||
semmem semitic mem
|
||
semnum semitic nun
|
||
sempe semitic pe
|
||
semqoph[123] semitic qoph
|
||
semresh semitic resh
|
||
semtav[1234] semitic tav
|
||
semyod semitic yod
|
||
semzayin[123] semitic zayin
|
||
shtlong[12] U with underbar. Cf glyconic
|
||
sigmatau σ,τ combination
|
||
squaver sixteenth note
|
||
sqbreve square musical breve note
|
||
swast swastika
|
||
uhrbr upper half of big }
|
||
versicle1 Cf versicle
|
||
*/
|
||
|
||
|
||
static Rune normtab[128] = {
|
||
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
||
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
|
||
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||
/*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||
0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
|
||
/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
||
0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f,
|
||
/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
||
0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
||
/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||
/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
||
};
|
||
static Rune phtab[128] = {
|
||
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
||
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*20*/ 0x20, 0x21, 0x2c8, 0x23, 0x24, 0x2cc, 0xe6, '\'',
|
||
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||
/*30*/ 0x30, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37,
|
||
0x38, 0xf8, 0x2d0, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
|
||
/*40*/ 0x259, 0x251, 0x42, 0x43, 0xf0, 0x25b, 0x46, 0x47,
|
||
0x48, 0x26a, 0x4a, 0x4b, L'L', 0x4d, 0x14b, 0x254,
|
||
/*50*/ 0x50, 0x252, 0x52, 0x283, 0x3b8, 0x28a, 0x28c, 0x57,
|
||
0x58, 0x59, 0x292, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
||
/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||
/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
||
};
|
||
static Rune grtab[128] = {
|
||
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
||
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
|
||
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||
/*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
||
0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
|
||
/*40*/ 0x40, 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393,
|
||
0x397, 0x399, 0x3da, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f,
|
||
/*50*/ 0x3a0, 0x398, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x56, 0x3a9,
|
||
0x3a7, 0x3a8, 0x396, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
||
/*60*/ 0x60, 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3,
|
||
0x3b7, 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf,
|
||
/*70*/ 0x3c0, 0x3b8, 0x3c1, 0x3c3, 0x3c4, 0x3c5, 0x76, 0x3c9,
|
||
0x3c7, 0x3c8, 0x3b6, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
||
};
|
||
static Rune subtab[128] = {
|
||
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
||
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
|
||
0x208d, 0x208e, 0x2a, 0x208a, 0x2c, 0x208b, 0x2e, 0x2f,
|
||
/*30*/ 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087,
|
||
0x2088, 0x2089, 0x3a, 0x3b, TAGS, 0x208c, TAGE, 0x3f,
|
||
/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
||
0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f,
|
||
/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
||
0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
||
/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||
/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
||
};
|
||
static Rune suptab[128] = {
|
||
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
||
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||
/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
|
||
0x207d, 0x207e, 0x2a, 0x207a, 0x2c, 0x207b, 0x2e, 0x2f,
|
||
/*30*/ 0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077,
|
||
0x2078, 0x2079, 0x3a, 0x3b, TAGS, 0x207c, TAGE, 0x3f,
|
||
/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
||
0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f,
|
||
/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
||
0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
||
/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
||
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||
/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
||
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
||
};
|
||
|
||
static int tagstarts;
|
||
static char tag[Buflen];
|
||
static int naux;
|
||
static char auxname[Maxaux][Buflen];
|
||
static char auxval[Maxaux][Buflen];
|
||
static char spec[Buflen];
|
||
static uchar *auxstate[Naux]; /* vals for most recent tag */
|
||
static Entry curentry;
|
||
#define cursize (curentry.end-curentry.start)
|
||
|
||
static char *getspec(char *, char *);
|
||
static char *gettag(char *, char *);
|
||
static void dostatus(void);
|
||
|
||
/*
|
||
* cmd is one of:
|
||
* 'p': normal print
|
||
* 'h': just print headwords
|
||
* 'P': print raw
|
||
*/
|
||
void
|
||
oedprintentry(Entry e, int cmd)
|
||
{
|
||
char *p, *pe;
|
||
int t, a, i;
|
||
long r, rprev, rlig;
|
||
Rune *transtab;
|
||
|
||
p = e.start;
|
||
pe = e.end;
|
||
transtab = normtab;
|
||
rprev = NONE;
|
||
changett(0, 0, 0);
|
||
curentry = e;
|
||
if(cmd == 'h')
|
||
outinhibit = 1;
|
||
while(p < pe) {
|
||
if(cmd == 'r') {
|
||
outchar(*p++);
|
||
continue;
|
||
}
|
||
r = transtab[(*p++)&0x7F];
|
||
if(r < NONE) {
|
||
/* Emit the rune, but buffer in case of ligature */
|
||
if(rprev != NONE)
|
||
outrune(rprev);
|
||
rprev = r;
|
||
} else if(r == SPCS) {
|
||
/* Start of special character name */
|
||
p = getspec(p, pe);
|
||
r = lookassoc(spectab, asize(spectab), spec);
|
||
if(r == -1) {
|
||
if(debug)
|
||
err("spec %ld %d %s",
|
||
e.doff, cursize, spec);
|
||
r = 0xfffd;
|
||
}
|
||
if(r >= LIGS && r < LIGE) {
|
||
/* handle possible ligature */
|
||
rlig = liglookup(r, rprev);
|
||
if(rlig != NONE)
|
||
rprev = rlig; /* overwrite rprev */
|
||
else {
|
||
/* could print accent, but let's not */
|
||
if(rprev != NONE) outrune(rprev);
|
||
rprev = NONE;
|
||
}
|
||
} else if(r >= MULTI && r < MULTIE) {
|
||
if(rprev != NONE) {
|
||
outrune(rprev);
|
||
rprev = NONE;
|
||
}
|
||
outrunes(multitab[r-MULTI]);
|
||
} else if(r == PAR) {
|
||
if(rprev != NONE) {
|
||
outrune(rprev);
|
||
rprev = NONE;
|
||
}
|
||
outnl(1);
|
||
} else {
|
||
if(rprev != NONE) outrune(rprev);
|
||
rprev = r;
|
||
}
|
||
} else if(r == TAGS) {
|
||
/* Start of tag name */
|
||
if(rprev != NONE) {
|
||
outrune(rprev);
|
||
rprev = NONE;
|
||
}
|
||
p = gettag(p, pe);
|
||
t = lookassoc(tagtab, asize(tagtab), tag);
|
||
if(t == -1) {
|
||
if(debug)
|
||
err("tag %ld %d %s",
|
||
e.doff, cursize, tag);
|
||
continue;
|
||
}
|
||
for(i = 0; i < Naux; i++)
|
||
auxstate[i] = 0;
|
||
for(i = 0; i < naux; i++) {
|
||
a = lookassoc(auxtab, asize(auxtab), auxname[i]);
|
||
if(a == -1) {
|
||
if(debug)
|
||
err("aux %ld %d %s",
|
||
e.doff, cursize, auxname[i]);
|
||
} else
|
||
auxstate[a] = auxval[i];
|
||
}
|
||
switch(t){
|
||
case E:
|
||
case Ve:
|
||
outnl(0);
|
||
if(tagstarts)
|
||
dostatus();
|
||
break;
|
||
case Ed:
|
||
case Etym:
|
||
outchar(tagstarts? '[' : ']');
|
||
break;
|
||
case Pr:
|
||
outchar(tagstarts? '(' : ')');
|
||
break;
|
||
case In:
|
||
transtab = changett(transtab, subtab, tagstarts);
|
||
break;
|
||
case Hm:
|
||
case Su:
|
||
case Fq:
|
||
transtab = changett(transtab, suptab, tagstarts);
|
||
break;
|
||
case Gk:
|
||
transtab = changett(transtab, grtab, tagstarts);
|
||
break;
|
||
case Ph:
|
||
transtab = changett(transtab, phtab, tagstarts);
|
||
break;
|
||
case Hw:
|
||
if(cmd == 'h') {
|
||
if(!tagstarts)
|
||
outchar(' ');
|
||
outinhibit = !tagstarts;
|
||
}
|
||
break;
|
||
case S0:
|
||
case S1:
|
||
case S2:
|
||
case S3:
|
||
case S4:
|
||
case S5:
|
||
case S6:
|
||
case S7a:
|
||
case S7n:
|
||
case Sn:
|
||
case Sgk:
|
||
if(tagstarts) {
|
||
outnl(2);
|
||
dostatus();
|
||
if(auxstate[Num]) {
|
||
if(t == S3 || t == S5) {
|
||
i = atoi(auxstate[Num]);
|
||
while(i--)
|
||
outchar('*');
|
||
outchars(" ");
|
||
} else if(t == S7a || t == S7n || t == Sn) {
|
||
outchar('(');
|
||
outchars(auxstate[Num]);
|
||
outchars(") ");
|
||
} else if(t == Sgk) {
|
||
i = grtab[auxstate[Num][0]];
|
||
if(i != NONE)
|
||
outrune(i);
|
||
outchars(". ");
|
||
} else {
|
||
outchars(auxstate[Num]);
|
||
outchars(". ");
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
case Cb:
|
||
case Db:
|
||
case Qp:
|
||
case P:
|
||
if(tagstarts)
|
||
outnl(1);
|
||
break;
|
||
case Table:
|
||
/*
|
||
* Todo: gather columns, justify them, etc.
|
||
* For now, just let colums come out as rows
|
||
*/
|
||
if(!tagstarts)
|
||
outnl(0);
|
||
break;
|
||
case Col:
|
||
if(tagstarts)
|
||
outnl(0);
|
||
break;
|
||
case Dn:
|
||
if(tagstarts)
|
||
outchar('/');
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if(cmd == 'h') {
|
||
outinhibit = 0;
|
||
outnl(0);
|
||
}
|
||
}
|
||
|
||
/*
|
||
* Return offset into bdict where next oed entry after fromoff starts.
|
||
* Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
|
||
*/
|
||
long
|
||
oednextoff(long fromoff)
|
||
{
|
||
long a, n;
|
||
int c;
|
||
|
||
a = Bseek(bdict, fromoff, 0);
|
||
if(a < 0)
|
||
return -1;
|
||
n = 0;
|
||
for(;;) {
|
||
c = Bgetc(bdict);
|
||
if(c < 0)
|
||
break;
|
||
if(c == '<') {
|
||
c = Bgetc(bdict);
|
||
if(c == 'e') {
|
||
c = Bgetc(bdict);
|
||
if(c == '>' || c == ' ')
|
||
n = 3;
|
||
} else if(c == 'v' && Bgetc(bdict) == 'e') {
|
||
c = Bgetc(bdict);
|
||
if(c == '>' || c == ' ')
|
||
n = 4;
|
||
}
|
||
if(n)
|
||
break;
|
||
}
|
||
}
|
||
return (Boffset(bdict)-n);
|
||
}
|
||
|
||
static char *prkey =
|
||
"KEY TO THE PRONUNCIATION\n"
|
||
"\n"
|
||
"I. CONSONANTS\n"
|
||
"b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
|
||
"\n"
|
||
"g as in go (gəʊ)\n"
|
||
"h ... ho! (həʊ)\n"
|
||
"r ... run (rʌn), terrier (ˈtɛriə(r))\n"
|
||
"(r)... her (hɜː(r))\n"
|
||
"s ... see (siː), success (səkˈsɜs)\n"
|
||
"w ... wear (wɛə(r))\n"
|
||
"hw ... when (hwɛn)\n"
|
||
"j ... yes (jɛs)\n"
|
||
"θ ... thin (θin), bath (bɑːθ)\n"
|
||
"ð ... then (ðɛn), bathe (beɪð)\n"
|
||
"ʃ ... shop (ʃɒp), dish (dɪʃ)\n"
|
||
"tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n"
|
||
"ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n"
|
||
"dʒ ... judge (dʒʌdʒ)\n"
|
||
"ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n"
|
||
"ŋg ... finger (ˈfiŋgə(r))\n"
|
||
"\n"
|
||
"Foreign\n"
|
||
"ʎ as in It. seraglio (serˈraʎo)\n"
|
||
"ɲ ... Fr. cognac (kɔɲak)\n"
|
||
"x ... Ger. ach (ax), Sc. loch (lɒx)\n"
|
||
"ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
|
||
"ɣ ... North Ger. sagen (ˈzaːɣən)\n"
|
||
"c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
|
||
"ɥ ... Fr. cuisine (kɥizin)\n"
|
||
"\n"
|
||
"II. VOWELS AND DIPTHONGS\n"
|
||
"\n"
|
||
"Short\n"
|
||
"ɪ as in pit (pɪt), -ness (-nɪs)\n"
|
||
"ɛ ... pet (pɛt), Fr. sept (sɛt)\n"
|
||
"æ ... pat (pæt)\n"
|
||
"ʌ ... putt (pʌt)\n"
|
||
"ɒ ... pot (pɒt)\n"
|
||
"ʊ ... put (pʊt)\n"
|
||
"ə ... another (əˈnʌðə(r))\n"
|
||
"(ə)... beaten (ˈbiːt(ə)n)\n"
|
||
"i ... Fr. si (si)\n"
|
||
"e ... Fr. bébé (bebe)\n"
|
||
"a ... Fr. mari (mari)\n"
|
||
"ɑ ... Fr. bâtiment (bɑtimã)\n"
|
||
"ɔ ... Fr. homme (ɔm)\n"
|
||
"o ... Fr. eau (o)\n"
|
||
"ø ... Fr. peu (pø)\n"
|
||
"œ ... Fr. boeuf (bœf), coeur (kœr)\n"
|
||
"u ... Fr. douce (dus)\n"
|
||
"ʏ ... Ger. Müller (ˈmʏlər)\n"
|
||
"y ... Fr. du (dy)\n"
|
||
"\n"
|
||
"Long\n"
|
||
"iː as in bean (biːn)\n"
|
||
"ɑː ... barn (bɑːn)\n"
|
||
"ɔː ... born (bɔːn)\n"
|
||
"uː ... boon (buːn)\n"
|
||
"ɜː ... burn (bɜːn)\n"
|
||
"eː ... Ger. Schnee (ʃneː)\n"
|
||
"ɛː ... Ger. Fähre (ˈfɛːrə)\n"
|
||
"aː ... Ger. Tag (taːk)\n"
|
||
"oː ... Ger. Sohn (zoːn)\n"
|
||
"øː ... Ger. Goethe (gøːtə)\n"
|
||
"yː ... Ger. grün (gryːn)\n"
|
||
"\n"
|
||
"Nasal\n"
|
||
"ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
|
||
"ã ... Fr. franc (frã)\n"
|
||
"ɔ˜ ... Fr. bon (bɔ˜n)\n"
|
||
"œ˜ ... Fr. un (œ˜)\n"
|
||
"\n"
|
||
"Dipthongs, etc.\n"
|
||
"eɪ as in bay (beɪ)\n"
|
||
"aɪ ... buy (baɪ)\n"
|
||
"ɔɪ ... boy (bɔɪ)\n"
|
||
"əʊ ... no (nəʊ)\n"
|
||
"aʊ ... now (naʊ)\n"
|
||
"ɪə ... peer (pɪə(r))\n"
|
||
"ɛə ... pair (pɛə(r))\n"
|
||
"ʊə ... tour (tʊə(r))\n"
|
||
"ɔə ... boar (bɔə(r))\n"
|
||
"\n"
|
||
"III. STRESS\n"
|
||
"\n"
|
||
"Main stress: ˈ preceding stressed syllable\n"
|
||
"Secondary stress: ˌ preceding stressed syllable\n"
|
||
"\n"
|
||
"E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
|
||
/* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
|
||
|
||
void
|
||
oedprintkey(void)
|
||
{
|
||
Bprint(bout, "%s", prkey);
|
||
}
|
||
|
||
/*
|
||
* f points just after a '&', fe points at end of entry.
|
||
* Accumulate the special name, starting after the &
|
||
* and continuing until the next '.', in spec[].
|
||
* Return pointer to char after '.'.
|
||
*/
|
||
static char *
|
||
getspec(char *f, char *fe)
|
||
{
|
||
char *t;
|
||
int c, i;
|
||
|
||
t = spec;
|
||
i = sizeof spec;
|
||
while(--i > 0) {
|
||
c = *f++;
|
||
if(c == '.' || f == fe)
|
||
break;
|
||
*t++ = c;
|
||
}
|
||
*t = 0;
|
||
return f;
|
||
}
|
||
|
||
/*
|
||
* f points just after '<'; fe points at end of entry.
|
||
* Expect next characters from bin to match:
|
||
* [/][^ >]+( [^>=]+=[^ >]+)*>
|
||
* tag auxname auxval
|
||
* Accumulate the tag and its auxilliary information in
|
||
* tag[], auxname[][] and auxval[][].
|
||
* Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
|
||
* Set naux to the number of aux pairs found.
|
||
* Return pointer to after final '>'.
|
||
*/
|
||
static char *
|
||
gettag(char *f, char *fe)
|
||
{
|
||
char *t;
|
||
int c, i;
|
||
|
||
t = tag;
|
||
c = *f++;
|
||
if(c == '/')
|
||
tagstarts = 0;
|
||
else {
|
||
tagstarts = 1;
|
||
*t++ = c;
|
||
}
|
||
i = Buflen;
|
||
naux = 0;
|
||
while(--i > 0) {
|
||
c = *f++;
|
||
if(c == '>' || f == fe)
|
||
break;
|
||
if(c == ' ') {
|
||
*t = 0;
|
||
t = auxname[naux];
|
||
i = Buflen;
|
||
if(naux < Maxaux-1)
|
||
naux++;
|
||
} else if(naux && c == '=') {
|
||
*t = 0;
|
||
t = auxval[naux-1];
|
||
i = Buflen;
|
||
} else
|
||
*t++ = c;
|
||
}
|
||
*t = 0;
|
||
return f;
|
||
}
|
||
|
||
static void
|
||
dostatus(void)
|
||
{
|
||
char *s;
|
||
|
||
s = auxstate[St];
|
||
if(s) {
|
||
if(strcmp(s, "obs") == 0)
|
||
outrune(0x2020);
|
||
else if(strcmp(s, "ali") == 0)
|
||
outrune(0x2016);
|
||
else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
|
||
outrune(0xb6);
|
||
else if(strcmp(s, "xref") == 0)
|
||
{/* nothing */}
|
||
else if(debug)
|
||
err("status %ld %d %s", curentry.doff, cursize, s);
|
||
}
|
||
}
|