mirror of
https://github.com/9fans/plan9port.git
synced 2025-01-15 11:20:03 +00:00
1426 lines
35 KiB
C
1426 lines
35 KiB
C
|
#include <u.h>
|
|||
|
#include <libc.h>
|
|||
|
#include <bio.h>
|
|||
|
#include "dict.h"
|
|||
|
|
|||
|
enum {
|
|||
|
Buflen=1000,
|
|||
|
Maxaux=5,
|
|||
|
};
|
|||
|
|
|||
|
/* Possible tags */
|
|||
|
enum {
|
|||
|
A, /* author in quote (small caps) */
|
|||
|
B, /* bold */
|
|||
|
Ba, /* author inside bib */
|
|||
|
Bch, /* builtup chem component */
|
|||
|
Bib, /* surrounds word 'in' for bibliographic ref */
|
|||
|
Bl, /* bold */
|
|||
|
Bo, /* bond over */
|
|||
|
Bu, /* bond under */
|
|||
|
Cb, /* ? block of stuff (indent) */
|
|||
|
Cf, /* cross ref to another entry (italics) */
|
|||
|
Chem, /* chemistry formula */
|
|||
|
Co, /* over (preceding sum, integral, etc.) */
|
|||
|
Col, /* column of table (aux just may be r) */
|
|||
|
Cu, /* under (preceding sum, integral, etc.) */
|
|||
|
Dat, /* date */
|
|||
|
Db, /* def block? indent */
|
|||
|
Dn, /* denominator of fraction */
|
|||
|
E, /* main entry */
|
|||
|
Ed, /* editor's comments (in [...]) */
|
|||
|
Etym, /* etymology (in [...]) */
|
|||
|
Fq, /* frequency count (superscript) */
|
|||
|
Form, /* formula */
|
|||
|
Fr, /* fraction (contains <nu>, then <dn>) */
|
|||
|
Gk, /* greek (transliteration) */
|
|||
|
Gr, /* grammar? (e.g., around 'pa.' in 'pa. pple.') */
|
|||
|
Hg, /* headword group */
|
|||
|
Hm, /* homonym (superscript) */
|
|||
|
Hw, /* headword (bold) */
|
|||
|
I, /* italics */
|
|||
|
Il, /* italic list? */
|
|||
|
In, /* inferior (subscript) */
|
|||
|
L, /* row of col of table */
|
|||
|
La, /* status or usage label (italic) */
|
|||
|
Lc, /* chapter/verse sort of thing for works */
|
|||
|
N, /* note (smaller type) */
|
|||
|
Nu, /* numerator of fraction */
|
|||
|
Ov, /* needs overline */
|
|||
|
P, /* paragraph (indent) */
|
|||
|
Ph, /* pronunciation (transliteration) */
|
|||
|
Pi, /* pile (frac without line) */
|
|||
|
Pqp, /* subblock of quote */
|
|||
|
Pr, /* pronunciation (in (...)) */
|
|||
|
Ps, /* position (e.g., adv.) (italic) */
|
|||
|
Pt, /* part (in lc) */
|
|||
|
Q, /* quote in quote block */
|
|||
|
Qd, /* quote date (bold) */
|
|||
|
Qig, /* quote number (greek) */
|
|||
|
Qla, /* status or usage label in quote (italic) */
|
|||
|
Qp, /* quote block (small type, indent) */
|
|||
|
Qsn, /* quote number */
|
|||
|
Qt, /* quote words */
|
|||
|
R, /* roman type style */
|
|||
|
Rx, /* relative cross reference (e.g., next) */
|
|||
|
S, /* another form? (italic) */
|
|||
|
S0, /* sense (sometimes surrounds several sx's) */
|
|||
|
S1, /* sense (aux num: indented bold letter) */
|
|||
|
S2, /* sense (aux num: indented bold capital rom num) */
|
|||
|
S3, /* sense (aux num: indented number of asterisks) */
|
|||
|
S4, /* sense (aux num: indented bold number) */
|
|||
|
S5, /* sense (aux num: indented number of asterisks) */
|
|||
|
S6, /* subsense (aux num: bold letter) */
|
|||
|
S7a, /* subsense (aux num: letter) */
|
|||
|
S7n, /* subsense (aux num: roman numeral) */
|
|||
|
Sc, /* small caps */
|
|||
|
Sgk, /* subsense (aux num: transliterated greek) */
|
|||
|
Sn, /* sense of subdefinition (aux num: roman letter) */
|
|||
|
Ss, /* sans serif */
|
|||
|
Ssb, /* sans serif bold */
|
|||
|
Ssi, /* sans serif italic */
|
|||
|
Su, /* superior (superscript) */
|
|||
|
Sub, /* subdefinition */
|
|||
|
Table, /* table (aux cols=number of columns) */
|
|||
|
Tt, /* title? (italics) */
|
|||
|
Vd, /* numeric label for variant form */
|
|||
|
Ve, /* variant entry */
|
|||
|
Vf, /* variant form (light bold) */
|
|||
|
Vfl, /* list of vf's (starts with Also or Forms) */
|
|||
|
W, /* work (e.g., Beowulf) (italics) */
|
|||
|
X, /* cross reference to main word (small caps) */
|
|||
|
Xd, /* cross reference to quotation by date */
|
|||
|
Xi, /* internal cross reference ? (italic) */
|
|||
|
Xid, /* cross reference identifer, in quote ? */
|
|||
|
Xs, /* cross reference sense (lower number) */
|
|||
|
Xr, /* list of x's */
|
|||
|
Ntag /* end of tags */
|
|||
|
};
|
|||
|
|
|||
|
/* Assoc tables must be sorted on first field */
|
|||
|
|
|||
|
static Assoc tagtab[] = {
|
|||
|
{"a", A},
|
|||
|
{"b", B},
|
|||
|
{"ba", Ba},
|
|||
|
{"bch", Bch},
|
|||
|
{"bib", Bib},
|
|||
|
{"bl", Bl},
|
|||
|
{"bo", Bo},
|
|||
|
{"bu", Bu},
|
|||
|
{"cb", Cb},
|
|||
|
{"cf", Cf},
|
|||
|
{"chem", Chem},
|
|||
|
{"co", Co},
|
|||
|
{"col", Col},
|
|||
|
{"cu", Cu},
|
|||
|
{"dat", Dat},
|
|||
|
{"db", Db},
|
|||
|
{"dn", Dn},
|
|||
|
{"e", E},
|
|||
|
{"ed", Ed},
|
|||
|
{"et", Etym},
|
|||
|
{"etym", Etym},
|
|||
|
{"form", Form},
|
|||
|
{"fq", Fq},
|
|||
|
{"fr", Fr},
|
|||
|
{"frac", Fr},
|
|||
|
{"gk", Gk},
|
|||
|
{"gr", Gr},
|
|||
|
{"hg", Hg},
|
|||
|
{"hm", Hm},
|
|||
|
{"hw", Hw},
|
|||
|
{"i", I},
|
|||
|
{"il", Il},
|
|||
|
{"in", In},
|
|||
|
{"l", L},
|
|||
|
{"la", La},
|
|||
|
{"lc", Lc},
|
|||
|
{"n", N},
|
|||
|
{"nu", Nu},
|
|||
|
{"ov", Ov},
|
|||
|
{"p", P},
|
|||
|
{"ph", Ph},
|
|||
|
{"pi", Pi},
|
|||
|
{"pqp", Pqp},
|
|||
|
{"pr", Pr},
|
|||
|
{"ps", Ps},
|
|||
|
{"pt", Pt},
|
|||
|
{"q", Q},
|
|||
|
{"qd", Qd},
|
|||
|
{"qig", Qig},
|
|||
|
{"qla", Qla},
|
|||
|
{"qp", Qp},
|
|||
|
{"qsn", Qsn},
|
|||
|
{"qt", Qt},
|
|||
|
{"r", R},
|
|||
|
{"rx", Rx},
|
|||
|
{"s", S},
|
|||
|
{"s0", S0},
|
|||
|
{"s1", S1},
|
|||
|
{"s2", S2},
|
|||
|
{"s3", S3},
|
|||
|
{"s4", S4},
|
|||
|
{"s5", S5},
|
|||
|
{"s6", S6},
|
|||
|
{"s7a", S7a},
|
|||
|
{"s7n", S7n},
|
|||
|
{"sc", Sc},
|
|||
|
{"sgk", Sgk},
|
|||
|
{"sn", Sn},
|
|||
|
{"ss", Ss,},
|
|||
|
{"ssb", Ssb},
|
|||
|
{"ssi", Ssi},
|
|||
|
{"su", Su},
|
|||
|
{"sub", Sub},
|
|||
|
{"table", Table},
|
|||
|
{"tt", Tt},
|
|||
|
{"vd", Vd},
|
|||
|
{"ve", Ve},
|
|||
|
{"vf", Vf},
|
|||
|
{"vfl", Vfl},
|
|||
|
{"w", W},
|
|||
|
{"x", X},
|
|||
|
{"xd", Xd},
|
|||
|
{"xi", Xi},
|
|||
|
{"xid", Xid},
|
|||
|
{"xr", Xr},
|
|||
|
{"xs", Xs},
|
|||
|
};
|
|||
|
|
|||
|
/* Possible tag auxilliary info */
|
|||
|
enum {
|
|||
|
Cols, /* number of columns in a table */
|
|||
|
Num, /* letter or number, for a sense */
|
|||
|
St, /* status (e.g., obs) */
|
|||
|
Naux
|
|||
|
};
|
|||
|
|
|||
|
static Assoc auxtab[] = {
|
|||
|
{"cols", Cols},
|
|||
|
{"num", Num},
|
|||
|
{"st", St}
|
|||
|
};
|
|||
|
|
|||
|
static Assoc spectab[] = {
|
|||
|
{"3on4", 0xbe},
|
|||
|
{"Aacu", 0xc1},
|
|||
|
{"Aang", 0xc5},
|
|||
|
{"Abarab", 0x100},
|
|||
|
{"Acirc", 0xc2},
|
|||
|
{"Ae", 0xc6},
|
|||
|
{"Agrave", 0xc0},
|
|||
|
{"Alpha", 0x391},
|
|||
|
{"Amac", 0x100},
|
|||
|
{"Asg", 0x1b7}, /* Unicyle. Cf "Sake" */
|
|||
|
{"Auml", 0xc4},
|
|||
|
{"Beta", 0x392},
|
|||
|
{"Cced", 0xc7},
|
|||
|
{"Chacek", 0x10c},
|
|||
|
{"Chi", 0x3a7},
|
|||
|
{"Chirho", 0x2627}, /* Chi Rho U+2627 */
|
|||
|
{"Csigma", 0x3da},
|
|||
|
{"Delta", 0x394},
|
|||
|
{"Eacu", 0xc9},
|
|||
|
{"Ecirc", 0xca},
|
|||
|
{"Edh", 0xd0},
|
|||
|
{"Epsilon", 0x395},
|
|||
|
{"Eta", 0x397},
|
|||
|
{"Gamma", 0x393},
|
|||
|
{"Iacu", 0xcd},
|
|||
|
{"Icirc", 0xce},
|
|||
|
{"Imac", 0x12a},
|
|||
|
{"Integ", 0x222b},
|
|||
|
{"Iota", 0x399},
|
|||
|
{"Kappa", 0x39a},
|
|||
|
{"Koppa", 0x3de},
|
|||
|
{"Lambda", 0x39b},
|
|||
|
{"Lbar", 0x141},
|
|||
|
{"Mu", 0x39c},
|
|||
|
{"Naira", 0x4e}, /* should have bar through */
|
|||
|
{"Nplus", 0x4e}, /* should have plus above */
|
|||
|
{"Ntilde", 0xd1},
|
|||
|
{"Nu", 0x39d},
|
|||
|
{"Oacu", 0xd3},
|
|||
|
{"Obar", 0xd8},
|
|||
|
{"Ocirc", 0xd4},
|
|||
|
{"Oe", 0x152},
|
|||
|
{"Omega", 0x3a9},
|
|||
|
{"Omicron", 0x39f},
|
|||
|
{"Ouml", 0xd6},
|
|||
|
{"Phi", 0x3a6},
|
|||
|
{"Pi", 0x3a0},
|
|||
|
{"Psi", 0x3a8},
|
|||
|
{"Rho", 0x3a1},
|
|||
|
{"Sacu", 0x15a},
|
|||
|
{"Sigma", 0x3a3},
|
|||
|
{"Summ", 0x2211},
|
|||
|
{"Tau", 0x3a4},
|
|||
|
{"Th", 0xde},
|
|||
|
{"Theta", 0x398},
|
|||
|
{"Tse", 0x426},
|
|||
|
{"Uacu", 0xda},
|
|||
|
{"Ucirc", 0xdb},
|
|||
|
{"Upsilon", 0x3a5},
|
|||
|
{"Uuml", 0xdc},
|
|||
|
{"Wyn", 0x1bf}, /* wynn U+01BF */
|
|||
|
{"Xi", 0x39e},
|
|||
|
{"Ygh", 0x1b7}, /* Yogh U+01B7 */
|
|||
|
{"Zeta", 0x396},
|
|||
|
{"Zh", 0x1b7}, /* looks like Yogh. Cf "Sake" */
|
|||
|
{"a", 0x61}, /* ante */
|
|||
|
{"aacu", 0xe1},
|
|||
|
{"aang", 0xe5},
|
|||
|
{"aasper", MAAS},
|
|||
|
{"abreve", 0x103},
|
|||
|
{"acirc", 0xe2},
|
|||
|
{"acu", LACU},
|
|||
|
{"ae", 0xe6},
|
|||
|
{"agrave", 0xe0},
|
|||
|
{"ahook", 0x105},
|
|||
|
{"alenis", MALN},
|
|||
|
{"alpha", 0x3b1},
|
|||
|
{"amac", 0x101},
|
|||
|
{"amp", 0x26},
|
|||
|
{"and", MAND},
|
|||
|
{"ang", LRNG},
|
|||
|
{"angle", 0x2220},
|
|||
|
{"ankh", 0x2625}, /* ankh U+2625 */
|
|||
|
{"ante", 0x61}, /* before (year) */
|
|||
|
{"aonq", MAOQ},
|
|||
|
{"appreq", 0x2243},
|
|||
|
{"aquar", 0x2652},
|
|||
|
{"arDadfull", 0x636}, /* Dad U+0636 */
|
|||
|
{"arHa", 0x62d}, /* haa U+062D */
|
|||
|
{"arTa", 0x62a}, /* taa U+062A */
|
|||
|
{"arain", 0x639}, /* ain U+0639 */
|
|||
|
{"arainfull", 0x639}, /* ain U+0639 */
|
|||
|
{"aralif", 0x627}, /* alef U+0627 */
|
|||
|
{"arba", 0x628}, /* baa U+0628 */
|
|||
|
{"arha", 0x647}, /* ha U+0647 */
|
|||
|
{"aries", 0x2648},
|
|||
|
{"arnun", 0x646}, /* noon U+0646 */
|
|||
|
{"arnunfull", 0x646}, /* noon U+0646 */
|
|||
|
{"arpa", 0x647}, /* ha U+0647 */
|
|||
|
{"arqoph", 0x642}, /* qaf U+0642 */
|
|||
|
{"arshinfull", 0x634}, /* sheen U+0634 */
|
|||
|
{"arta", 0x62a}, /* taa U+062A */
|
|||
|
{"artafull", 0x62a}, /* taa U+062A */
|
|||
|
{"artha", 0x62b}, /* thaa U+062B */
|
|||
|
{"arwaw", 0x648}, /* waw U+0648 */
|
|||
|
{"arya", 0x64a}, /* ya U+064A */
|
|||
|
{"aryafull", 0x64a}, /* ya U+064A */
|
|||
|
{"arzero", 0x660}, /* indic zero U+0660 */
|
|||
|
{"asg", 0x292}, /* unicycle character. Cf "hallow" */
|
|||
|
{"asper", LASP},
|
|||
|
{"assert", 0x22a2},
|
|||
|
{"astm", 0x2042}, /* asterism: should be upside down */
|
|||
|
{"at", 0x40},
|
|||
|
{"atilde", 0xe3},
|
|||
|
{"auml", 0xe4},
|
|||
|
{"ayin", 0x639}, /* arabic ain U+0639 */
|
|||
|
{"b1", 0x2d}, /* single bond */
|
|||
|
{"b2", 0x3d}, /* double bond */
|
|||
|
{"b3", 0x2261}, /* triple bond */
|
|||
|
{"bbar", 0x180}, /* b with bar U+0180 */
|
|||
|
{"beta", 0x3b2},
|
|||
|
{"bigobl", 0x2f},
|
|||
|
{"blC", 0x43}, /* should be black letter */
|
|||
|
{"blJ", 0x4a}, /* should be black letter */
|
|||
|
{"blU", 0x55}, /* should be black letter */
|
|||
|
{"blb", 0x62}, /* should be black letter */
|
|||
|
{"blozenge", 0x25ca}, /* U+25CA; should be black */
|
|||
|
{"bly", 0x79}, /* should be black letter */
|
|||
|
{"bra", MBRA},
|
|||
|
{"brbl", LBRB},
|
|||
|
{"breve", LBRV},
|
|||
|
{"bslash", L'\\'},
|
|||
|
{"bsquare", 0x25a0}, /* black square U+25A0 */
|
|||
|
{"btril", 0x25c0}, /* U+25C0 */
|
|||
|
{"btrir", 0x25b6}, /* U+25B6 */
|
|||
|
{"c", 0x63}, /* circa */
|
|||
|
{"cab", 0x232a},
|
|||
|
{"cacu", 0x107},
|
|||
|
{"canc", 0x264b},
|
|||
|
{"capr", 0x2651},
|
|||
|
{"caret", 0x5e},
|
|||
|
{"cb", 0x7d},
|
|||
|
{"cbigb", 0x7d},
|
|||
|
{"cbigpren", 0x29},
|
|||
|
{"cbigsb", 0x5d},
|
|||
|
{"cced", 0xe7},
|
|||
|
{"cdil", LCED},
|
|||
|
{"cdsb", 0x301b}, /* ]] U+301b */
|
|||
|
{"cent", 0xa2},
|
|||
|
{"chacek", 0x10d},
|
|||
|
{"chi", 0x3c7},
|
|||
|
{"circ", LRNG},
|
|||
|
{"circa", 0x63}, /* about (year) */
|
|||
|
{"circbl", 0x325}, /* ring below accent U+0325 */
|
|||
|
{"circle", 0x25cb}, /* U+25CB */
|
|||
|
{"circledot", 0x2299},
|
|||
|
{"click", 0x296},
|
|||
|
{"club", 0x2663},
|
|||
|
{"comtime", 0x43},
|
|||
|
{"conj", 0x260c},
|
|||
|
{"cprt", 0xa9},
|
|||
|
{"cq", '\''},
|
|||
|
{"cqq", 0x201d},
|
|||
|
{"cross", 0x2720}, /* maltese cross U+2720 */
|
|||
|
{"crotchet", 0x2669},
|
|||
|
{"csb", 0x5d},
|
|||
|
{"ctilde", 0x63}, /* +tilde */
|
|||
|
{"ctlig", MLCT},
|
|||
|
{"cyra", 0x430},
|
|||
|
{"cyre", 0x435},
|
|||
|
{"cyrhard", 0x44a},
|
|||
|
{"cyrjat", 0x463},
|
|||
|
{"cyrm", 0x43c},
|
|||
|
{"cyrn", 0x43d},
|
|||
|
{"cyrr", 0x440},
|
|||
|
{"cyrsoft", 0x44c},
|
|||
|
{"cyrt", 0x442},
|
|||
|
{"cyry", 0x44b},
|
|||
|
{"dag", 0x2020},
|
|||
|
{"dbar", 0x111},
|
|||
|
{"dblar", 0x21cb},
|
|||
|
{"dblgt", 0x226b},
|
|||
|
{"dbllt", 0x226a},
|
|||
|
{"dced", 0x64}, /* +cedilla */
|
|||
|
{"dd", MDD},
|
|||
|
{"ddag", 0x2021},
|
|||
|
{"ddd", MDDD},
|
|||
|
{"decr", 0x2193},
|
|||
|
{"deg", 0xb0},
|
|||
|
{"dele", 0x64}, /* should be dele */
|
|||
|
{"delta", 0x3b4},
|
|||
|
{"descnode", 0x260b}, /* descending node U+260B */
|
|||
|
{"diamond", 0x2662},
|
|||
|
{"digamma", 0x3dd},
|
|||
|
{"div", 0xf7},
|
|||
|
{"dlessi", 0x131},
|
|||
|
{"dlessj1", 0x6a}, /* should be dotless */
|
|||
|
{"dlessj2", 0x6a}, /* should be dotless */
|
|||
|
{"dlessj3", 0x6a}, /* should be dotless */
|
|||
|
{"dollar", 0x24},
|
|||
|
{"dotab", LDOT},
|
|||
|
{"dotbl", LDTB},
|
|||
|
{"drachm", 0x292},
|
|||
|
{"dubh", 0x2d},
|
|||
|
{"eacu", 0xe9},
|
|||
|
{"earth", 0x2641},
|
|||
|
{"easper", MEAS},
|
|||
|
{"ebreve", 0x115},
|
|||
|
{"ecirc", 0xea},
|
|||
|
{"edh", 0xf0},
|
|||
|
{"egrave", 0xe8},
|
|||
|
{"ehacek", 0x11b},
|
|||
|
{"ehook", 0x119},
|
|||
|
{"elem", 0x220a},
|
|||
|
{"elenis", MELN},
|
|||
|
{"em", 0x2014},
|
|||
|
{"emac", 0x113},
|
|||
|
{"emem", MEMM},
|
|||
|
{"en", 0x2013},
|
|||
|
{"epsilon", 0x3b5},
|
|||
|
{"equil", 0x21cb},
|
|||
|
{"ergo", 0x2234},
|
|||
|
{"es", MES},
|
|||
|
{"eszett", 0xdf},
|
|||
|
{"eta", 0x3b7},
|
|||
|
{"eth", 0xf0},
|
|||
|
{"euml", 0xeb},
|
|||
|
{"expon", 0x2191},
|
|||
|
{"fact", 0x21},
|
|||
|
{"fata", 0x251},
|
|||
|
{"fatpara", 0xb6}, /* should have fatter, filled in bowl */
|
|||
|
{"female", 0x2640},
|
|||
|
{"ffilig", MLFFI},
|
|||
|
{"fflig", MLFF},
|
|||
|
{"ffllig", MLFFL},
|
|||
|
{"filig", MLFI},
|
|||
|
{"flat", 0x266d},
|
|||
|
{"fllig", MLFL},
|
|||
|
{"frE", 0x45}, /* should be curly */
|
|||
|
{"frL", L'L'}, /* should be curly */
|
|||
|
{"frR", 0x52}, /* should be curly */
|
|||
|
{"frakB", 0x42}, /* should have fraktur style */
|
|||
|
{"frakG", 0x47},
|
|||
|
{"frakH", 0x48},
|
|||
|
{"frakI", 0x49},
|
|||
|
{"frakM", 0x4d},
|
|||
|
{"frakU", 0x55},
|
|||
|
{"frakX", 0x58},
|
|||
|
{"frakY", 0x59},
|
|||
|
{"frakh", 0x68},
|
|||
|
{"frbl", LFRB},
|
|||
|
{"frown", LFRN},
|
|||
|
{"fs", 0x20},
|
|||
|
{"fsigma", 0x3c2},
|
|||
|
{"gAacu", 0xc1}, /* should be Α+acute */
|
|||
|
{"gaacu", 0x3b1}, /* +acute */
|
|||
|
{"gabreve", 0x3b1}, /* +breve */
|
|||
|
{"gafrown", 0x3b1}, /* +frown */
|
|||
|
{"gagrave", 0x3b1}, /* +grave */
|
|||
|
{"gamac", 0x3b1}, /* +macron */
|
|||
|
{"gamma", 0x3b3},
|
|||
|
{"gauml", 0x3b1}, /* +umlaut */
|
|||
|
{"ge", 0x2267},
|
|||
|
{"geacu", 0x3b5}, /* +acute */
|
|||
|
{"gegrave", 0x3b5}, /* +grave */
|
|||
|
{"ghacu", 0x3b7}, /* +acute */
|
|||
|
{"ghfrown", 0x3b7}, /* +frown */
|
|||
|
{"ghgrave", 0x3b7}, /* +grave */
|
|||
|
{"ghmac", 0x3b7}, /* +macron */
|
|||
|
{"giacu", 0x3b9}, /* +acute */
|
|||
|
{"gibreve", 0x3b9}, /* +breve */
|
|||
|
{"gifrown", 0x3b9}, /* +frown */
|
|||
|
{"gigrave", 0x3b9}, /* +grave */
|
|||
|
{"gimac", 0x3b9}, /* +macron */
|
|||
|
{"giuml", 0x3b9}, /* +umlaut */
|
|||
|
{"glagjat", 0x467},
|
|||
|
{"glots", 0x2c0},
|
|||
|
{"goacu", 0x3bf}, /* +acute */
|
|||
|
{"gobreve", 0x3bf}, /* +breve */
|
|||
|
{"grave", LGRV},
|
|||
|
{"gt", 0x3e},
|
|||
|
{"guacu", 0x3c5}, /* +acute */
|
|||
|
{"gufrown", 0x3c5}, /* +frown */
|
|||
|
{"gugrave", 0x3c5}, /* +grave */
|
|||
|
{"gumac", 0x3c5}, /* +macron */
|
|||
|
{"guuml", 0x3c5}, /* +umlaut */
|
|||
|
{"gwacu", 0x3c9}, /* +acute */
|
|||
|
{"gwfrown", 0x3c9}, /* +frown */
|
|||
|
{"gwgrave", 0x3c9}, /* +grave */
|
|||
|
{"hacek", LHCK},
|
|||
|
{"halft", 0x2308},
|
|||
|
{"hash", 0x23},
|
|||
|
{"hasper", MHAS},
|
|||
|
{"hatpath", 0x5b2}, /* hataf patah U+05B2 */
|
|||
|
{"hatqam", 0x5b3}, /* hataf qamats U+05B3 */
|
|||
|
{"hatseg", 0x5b1}, /* hataf segol U+05B1 */
|
|||
|
{"hbar", 0x127},
|
|||
|
{"heart", 0x2661},
|
|||
|
{"hebaleph", 0x5d0}, /* aleph U+05D0 */
|
|||
|
{"hebayin", 0x5e2}, /* ayin U+05E2 */
|
|||
|
{"hebbet", 0x5d1}, /* bet U+05D1 */
|
|||
|
{"hebbeth", 0x5d1}, /* bet U+05D1 */
|
|||
|
{"hebcheth", 0x5d7}, /* bet U+05D7 */
|
|||
|
{"hebdaleth", 0x5d3}, /* dalet U+05D3 */
|
|||
|
{"hebgimel", 0x5d2}, /* gimel U+05D2 */
|
|||
|
{"hebhe", 0x5d4}, /* he U+05D4 */
|
|||
|
{"hebkaph", 0x5db}, /* kaf U+05DB */
|
|||
|
{"heblamed", 0x5dc}, /* lamed U+05DC */
|
|||
|
{"hebmem", 0x5de}, /* mem U+05DE */
|
|||
|
{"hebnun", 0x5e0}, /* nun U+05E0 */
|
|||
|
{"hebnunfin", 0x5df}, /* final nun U+05DF */
|
|||
|
{"hebpe", 0x5e4}, /* pe U+05E4 */
|
|||
|
{"hebpedag", 0x5e3}, /* final pe? U+05E3 */
|
|||
|
{"hebqoph", 0x5e7}, /* qof U+05E7 */
|
|||
|
{"hebresh", 0x5e8}, /* resh U+05E8 */
|
|||
|
{"hebshin", 0x5e9}, /* shin U+05E9 */
|
|||
|
{"hebtav", 0x5ea}, /* tav U+05EA */
|
|||
|
{"hebtsade", 0x5e6}, /* tsadi U+05E6 */
|
|||
|
{"hebwaw", 0x5d5}, /* vav? U+05D5 */
|
|||
|
{"hebyod", 0x5d9}, /* yod U+05D9 */
|
|||
|
{"hebzayin", 0x5d6}, /* zayin U+05D6 */
|
|||
|
{"hgz", 0x292}, /* ??? Cf "alet" */
|
|||
|
{"hireq", 0x5b4}, /* U+05B4 */
|
|||
|
{"hlenis", MHLN},
|
|||
|
{"hook", LOGO},
|
|||
|
{"horizE", 0x45}, /* should be on side */
|
|||
|
{"horizP", 0x50}, /* should be on side */
|
|||
|
{"horizS", 0x223d},
|
|||
|
{"horizT", 0x22a3},
|
|||
|
{"horizb", 0x7b}, /* should be underbrace */
|
|||
|
{"ia", 0x3b1},
|
|||
|
{"iacu", 0xed},
|
|||
|
{"iasper", MIAS},
|
|||
|
{"ib", 0x3b2},
|
|||
|
{"ibar", 0x268},
|
|||
|
{"ibreve", 0x12d},
|
|||
|
{"icirc", 0xee},
|
|||
|
{"id", 0x3b4},
|
|||
|
{"ident", 0x2261},
|
|||
|
{"ie", 0x3b5},
|
|||
|
{"ifilig", MLFI},
|
|||
|
{"ifflig", MLFF},
|
|||
|
{"ig", 0x3b3},
|
|||
|
{"igrave", 0xec},
|
|||
|
{"ih", 0x3b7},
|
|||
|
{"ii", 0x3b9},
|
|||
|
{"ik", 0x3ba},
|
|||
|
{"ilenis", MILN},
|
|||
|
{"imac", 0x12b},
|
|||
|
{"implies", 0x21d2},
|
|||
|
{"index", 0x261e},
|
|||
|
{"infin", 0x221e},
|
|||
|
{"integ", 0x222b},
|
|||
|
{"intsec", 0x2229},
|
|||
|
{"invpri", 0x2cf},
|
|||
|
{"iota", 0x3b9},
|
|||
|
{"iq", 0x3c8},
|
|||
|
{"istlig", MLST},
|
|||
|
{"isub", 0x3f5}, /* iota below accent */
|
|||
|
{"iuml", 0xef},
|
|||
|
{"iz", 0x3b6},
|
|||
|
{"jup", 0x2643},
|
|||
|
{"kappa", 0x3ba},
|
|||
|
{"koppa", 0x3df},
|
|||
|
{"lambda", 0x3bb},
|
|||
|
{"lar", 0x2190},
|
|||
|
{"lbar", 0x142},
|
|||
|
{"le", 0x2266},
|
|||
|
{"lenis", LLEN},
|
|||
|
{"leo", 0x264c},
|
|||
|
{"lhalfbr", 0x2308},
|
|||
|
{"lhshoe", 0x2283},
|
|||
|
{"libra", 0x264e},
|
|||
|
{"llswing", MLLS},
|
|||
|
{"lm", 0x2d0},
|
|||
|
{"logicand", 0x2227},
|
|||
|
{"logicor", 0x2228},
|
|||
|
{"longs", 0x283},
|
|||
|
{"lrar", 0x2194},
|
|||
|
{"lt", 0x3c},
|
|||
|
{"ltappr", 0x227e},
|
|||
|
{"ltflat", 0x2220},
|
|||
|
{"lumlbl", 0x6c}, /* +umlaut below */
|
|||
|
{"mac", LMAC},
|
|||
|
{"male", 0x2642},
|
|||
|
{"mc", 0x63}, /* should be raised */
|
|||
|
{"merc", 0x263f}, /* mercury U+263F */
|
|||
|
{"min", 0x2212},
|
|||
|
{"moonfq", 0x263d}, /* first quarter moon U+263D */
|
|||
|
{"moonlq", 0x263e}, /* last quarter moon U+263E */
|
|||
|
{"msylab", 0x6d}, /* +sylab (ˌ) */
|
|||
|
{"mu", 0x3bc},
|
|||
|
{"nacu", 0x144},
|
|||
|
{"natural", 0x266e},
|
|||
|
{"neq", 0x2260},
|
|||
|
{"nfacu", 0x2032},
|
|||
|
{"nfasper", 0x2bd},
|
|||
|
{"nfbreve", 0x2d8},
|
|||
|
{"nfced", 0xb8},
|
|||
|
{"nfcirc", 0x2c6},
|
|||
|
{"nffrown", 0x2322},
|
|||
|
{"nfgra", 0x2cb},
|
|||
|
{"nfhacek", 0x2c7},
|
|||
|
{"nfmac", 0xaf},
|
|||
|
{"nftilde", 0x2dc},
|
|||
|
{"nfuml", 0xa8},
|
|||
|
{"ng", 0x14b},
|
|||
|
{"not", 0xac},
|
|||
|
{"notelem", 0x2209},
|
|||
|
{"ntilde", 0xf1},
|
|||
|
{"nu", 0x3bd},
|
|||
|
{"oab", 0x2329},
|
|||
|
{"oacu", 0xf3},
|
|||
|
{"oasper", MOAS},
|
|||
|
{"ob", 0x7b},
|
|||
|
{"obar", 0xf8},
|
|||
|
{"obigb", 0x7b}, /* should be big */
|
|||
|
{"obigpren", 0x28},
|
|||
|
{"obigsb", 0x5b}, /* should be big */
|
|||
|
{"obreve", 0x14f},
|
|||
|
{"ocirc", 0xf4},
|
|||
|
{"odsb", 0x301a}, /* [[ U+301A */
|
|||
|
{"oe", 0x153},
|
|||
|
{"oeamp", 0x26},
|
|||
|
{"ograve", 0xf2},
|
|||
|
{"ohook", 0x6f}, /* +hook */
|
|||
|
{"olenis", MOLN},
|
|||
|
{"omac", 0x14d},
|
|||
|
{"omega", 0x3c9},
|
|||
|
{"omicron", 0x3bf},
|
|||
|
{"ope", 0x25b},
|
|||
|
{"opp", 0x260d},
|
|||
|
{"oq", 0x60},
|
|||
|
{"oqq", 0x201c},
|
|||
|
{"or", MOR},
|
|||
|
{"osb", 0x5b},
|
|||
|
{"otilde", 0xf5},
|
|||
|
{"ouml", 0xf6},
|
|||
|
{"ounce", 0x2125}, /* ounce U+2125 */
|
|||
|
{"ovparen", 0x2322}, /* should be sideways ( */
|
|||
|
{"p", 0x2032},
|
|||
|
{"pa", 0x2202},
|
|||
|
{"page", 0x50},
|
|||
|
{"pall", 0x28e},
|
|||
|
{"paln", 0x272},
|
|||
|
{"par", PAR},
|
|||
|
{"para", 0xb6},
|
|||
|
{"pbar", 0x70}, /* +bar */
|
|||
|
{"per", 0x2118}, /* per U+2118 */
|
|||
|
{"phi", 0x3c6},
|
|||
|
{"phi2", 0x3d5},
|
|||
|
{"pi", 0x3c0},
|
|||
|
{"pisces", 0x2653},
|
|||
|
{"planck", 0x127},
|
|||
|
{"plantinJ", 0x4a}, /* should be script */
|
|||
|
{"pm", 0xb1},
|
|||
|
{"pmil", 0x2030},
|
|||
|
{"pp", 0x2033},
|
|||
|
{"ppp", 0x2034},
|
|||
|
{"prop", 0x221d},
|
|||
|
{"psi", 0x3c8},
|
|||
|
{"pstlg", 0xa3},
|
|||
|
{"q", 0x3f}, /* should be raised */
|
|||
|
{"qamets", 0x5b3}, /* U+05B3 */
|
|||
|
{"quaver", 0x266a},
|
|||
|
{"rar", 0x2192},
|
|||
|
{"rasper", MRAS},
|
|||
|
{"rdot", 0xb7},
|
|||
|
{"recipe", 0x211e}, /* U+211E */
|
|||
|
{"reg", 0xae},
|
|||
|
{"revC", 0x186}, /* open O U+0186 */
|
|||
|
{"reva", 0x252},
|
|||
|
{"revc", 0x254},
|
|||
|
{"revope", 0x25c},
|
|||
|
{"revr", 0x279},
|
|||
|
{"revsc", 0x2d2}, /* upside-down semicolon */
|
|||
|
{"revv", 0x28c},
|
|||
|
{"rfa", 0x6f}, /* +hook (Cf "goal") */
|
|||
|
{"rhacek", 0x159},
|
|||
|
{"rhalfbr", 0x2309},
|
|||
|
{"rho", 0x3c1},
|
|||
|
{"rhshoe", 0x2282},
|
|||
|
{"rlenis", MRLN},
|
|||
|
{"rsylab", 0x72}, /* +sylab */
|
|||
|
{"runash", 0x46}, /* should be runic 'ash' */
|
|||
|
{"rvow", 0x2d4},
|
|||
|
{"sacu", 0x15b},
|
|||
|
{"sagit", 0x2650},
|
|||
|
{"sampi", 0x3e1},
|
|||
|
{"saturn", 0x2644},
|
|||
|
{"sced", 0x15f},
|
|||
|
{"schwa", 0x259},
|
|||
|
{"scorpio", 0x264f},
|
|||
|
{"scrA", 0x41}, /* should be script */
|
|||
|
{"scrC", 0x43},
|
|||
|
{"scrE", 0x45},
|
|||
|
{"scrF", 0x46},
|
|||
|
{"scrI", 0x49},
|
|||
|
{"scrJ", 0x4a},
|
|||
|
{"scrL", L'L'},
|
|||
|
{"scrO", 0x4f},
|
|||
|
{"scrP", 0x50},
|
|||
|
{"scrQ", 0x51},
|
|||
|
{"scrS", 0x53},
|
|||
|
{"scrT", 0x54},
|
|||
|
{"scrb", 0x62},
|
|||
|
{"scrd", 0x64},
|
|||
|
{"scrh", 0x68},
|
|||
|
{"scrl", 0x6c},
|
|||
|
{"scruple", 0x2108}, /* U+2108 */
|
|||
|
{"sdd", 0x2d0},
|
|||
|
{"sect", 0xa7},
|
|||
|
{"semE", 0x2203},
|
|||
|
{"sh", 0x283},
|
|||
|
{"shacek", 0x161},
|
|||
|
{"sharp", 0x266f},
|
|||
|
{"sheva", 0x5b0}, /* U+05B0 */
|
|||
|
{"shti", 0x26a},
|
|||
|
{"shtsyll", 0x222a},
|
|||
|
{"shtu", 0x28a},
|
|||
|
{"sidetri", 0x22b2},
|
|||
|
{"sigma", 0x3c3},
|
|||
|
{"since", 0x2235},
|
|||
|
{"slge", 0x2265}, /* should have slanted line under */
|
|||
|
{"slle", 0x2264}, /* should have slanted line under */
|
|||
|
{"sm", 0x2c8},
|
|||
|
{"smm", 0x2cc},
|
|||
|
{"spade", 0x2660},
|
|||
|
{"sqrt", 0x221a},
|
|||
|
{"square", 0x25a1}, /* U+25A1 */
|
|||
|
{"ssChi", 0x3a7}, /* should be sans serif */
|
|||
|
{"ssIota", 0x399},
|
|||
|
{"ssOmicron", 0x39f},
|
|||
|
{"ssPi", 0x3a0},
|
|||
|
{"ssRho", 0x3a1},
|
|||
|
{"ssSigma", 0x3a3},
|
|||
|
{"ssTau", 0x3a4},
|
|||
|
{"star", 0x2a},
|
|||
|
{"stlig", MLST},
|
|||
|
{"sup2", 0x2072},
|
|||
|
{"supgt", 0x2c3},
|
|||
|
{"suplt", 0x2c2},
|
|||
|
{"sur", 0x2b3},
|
|||
|
{"swing", 0x223c},
|
|||
|
{"tau", 0x3c4},
|
|||
|
{"taur", 0x2649},
|
|||
|
{"th", 0xfe},
|
|||
|
{"thbar", 0xfe}, /* +bar */
|
|||
|
{"theta", 0x3b8},
|
|||
|
{"thinqm", 0x3f}, /* should be thinner */
|
|||
|
{"tilde", LTIL},
|
|||
|
{"times", 0xd7},
|
|||
|
{"tri", 0x2206},
|
|||
|
{"trli", 0x2016},
|
|||
|
{"ts", 0x2009},
|
|||
|
{"uacu", 0xfa},
|
|||
|
{"uasper", MUAS},
|
|||
|
{"ubar", 0x75}, /* +bar */
|
|||
|
{"ubreve", 0x16d},
|
|||
|
{"ucirc", 0xfb},
|
|||
|
{"udA", 0x2200},
|
|||
|
{"udT", 0x22a5},
|
|||
|
{"uda", 0x250},
|
|||
|
{"udh", 0x265},
|
|||
|
{"udqm", 0xbf},
|
|||
|
{"udpsi", 0x22d4},
|
|||
|
{"udtr", 0x2207},
|
|||
|
{"ugrave", 0xf9},
|
|||
|
{"ulenis", MULN},
|
|||
|
{"umac", 0x16b},
|
|||
|
{"uml", LUML},
|
|||
|
{"undl", 0x2cd}, /* underline accent */
|
|||
|
{"union", 0x222a},
|
|||
|
{"upsilon", 0x3c5},
|
|||
|
{"uuml", 0xfc},
|
|||
|
{"vavpath", 0x5d5}, /* vav U+05D5 (+patah) */
|
|||
|
{"vavsheva", 0x5d5}, /* vav U+05D5 (+sheva) */
|
|||
|
{"vb", 0x7c},
|
|||
|
{"vddd", 0x22ee},
|
|||
|
{"versicle2", 0x2123}, /* U+2123 */
|
|||
|
{"vinc", 0xaf},
|
|||
|
{"virgo", 0x264d},
|
|||
|
{"vpal", 0x25f},
|
|||
|
{"vvf", 0x263},
|
|||
|
{"wasper", MWAS},
|
|||
|
{"wavyeq", 0x2248},
|
|||
|
{"wlenis", MWLN},
|
|||
|
{"wyn", 0x1bf}, /* wynn U+01BF */
|
|||
|
{"xi", 0x3be},
|
|||
|
{"yacu", 0xfd},
|
|||
|
{"ycirc", 0x177},
|
|||
|
{"ygh", 0x292},
|
|||
|
{"ymac", 0x79}, /* +macron */
|
|||
|
{"yuml", 0xff},
|
|||
|
{"zced", 0x7a}, /* +cedilla */
|
|||
|
{"zeta", 0x3b6},
|
|||
|
{"zh", 0x292},
|
|||
|
{"zhacek", 0x17e},
|
|||
|
};
|
|||
|
/*
|
|||
|
The following special characters don't have close enough
|
|||
|
equivalents in Unicode, so aren't in the above table.
|
|||
|
22n 2^(2^n) Cf Fermat
|
|||
|
2on4 2/4
|
|||
|
3on8 3/8
|
|||
|
Bantuo Bantu O. Cf Otshi-herero
|
|||
|
Car C with circular arrow on top
|
|||
|
albrtime cut-time: C with vertical line
|
|||
|
ardal Cf dental
|
|||
|
bantuo Bantu o. Cf Otshi-herero
|
|||
|
bbc1 single chem bond below
|
|||
|
bbc2 double chem bond below
|
|||
|
bbl1 chem bond like /
|
|||
|
bbl2 chem bond like //
|
|||
|
bbr1 chem bond like \
|
|||
|
bbr2 chem bond \\
|
|||
|
bcop1 copper symbol. Cf copper
|
|||
|
bcop2 copper symbol. Cf copper
|
|||
|
benchm Cf benchmark
|
|||
|
btc1 single chem bond above
|
|||
|
btc2 double chem bond above
|
|||
|
btl1 chem bond like \
|
|||
|
btl2 chem bond like \\
|
|||
|
btr1 chem bond like /
|
|||
|
btr2 chem bond line //
|
|||
|
burman Cf Burman
|
|||
|
devph sanskrit letter. Cf ph
|
|||
|
devrfls sanskrit letter. Cf cerebral
|
|||
|
duplong[12] musical note
|
|||
|
egchi early form of chi
|
|||
|
eggamma[12] early form of gamma
|
|||
|
egiota early form of iota
|
|||
|
egkappa early form of kappa
|
|||
|
eglambda early form of lambda
|
|||
|
egmu[12] early form of mu
|
|||
|
egnu[12] early form of nu
|
|||
|
egpi[123] early form of pi
|
|||
|
egrho[12] early form of rho
|
|||
|
egsampi early form of sampi
|
|||
|
egsan early form of san
|
|||
|
egsigma[12] early form of sigma
|
|||
|
egxi[123] early form of xi
|
|||
|
elatS early form of S
|
|||
|
elatc[12] early form of C
|
|||
|
elatg[12] early form of G
|
|||
|
glagjeri Slavonic Glagolitic jeri
|
|||
|
glagjeru Slavonic Glagolitic jeru
|
|||
|
hypolem hypolemisk (line with underdot)
|
|||
|
lhrbr lower half }
|
|||
|
longmord long mordent
|
|||
|
mbwvow backwards scretched C. Cf retract.
|
|||
|
mord music symbol. Cf mordent
|
|||
|
mostra Cf direct
|
|||
|
ohgcirc old form of circumflex
|
|||
|
oldbeta old form of β. Cf perturbate
|
|||
|
oldsemibr[12] old forms of semibreve. Cf prolation
|
|||
|
ormg old form of g. Cf G
|
|||
|
para[12345] form of ¶
|
|||
|
pauseo musical pause sign
|
|||
|
pauseu musical pause sign
|
|||
|
pharyng Cf pharyngal
|
|||
|
ragr Black letter ragged r
|
|||
|
repetn musical repeat. Cf retort
|
|||
|
segno musical segno sign
|
|||
|
semain[12] semitic ain
|
|||
|
semhe semitic he
|
|||
|
semheth semitic heth
|
|||
|
semkaph semitic kaph
|
|||
|
semlamed[12] semitic lamed
|
|||
|
semmem semitic mem
|
|||
|
semnum semitic nun
|
|||
|
sempe semitic pe
|
|||
|
semqoph[123] semitic qoph
|
|||
|
semresh semitic resh
|
|||
|
semtav[1234] semitic tav
|
|||
|
semyod semitic yod
|
|||
|
semzayin[123] semitic zayin
|
|||
|
shtlong[12] U with underbar. Cf glyconic
|
|||
|
sigmatau σ,τ combination
|
|||
|
squaver sixteenth note
|
|||
|
sqbreve square musical breve note
|
|||
|
swast swastika
|
|||
|
uhrbr upper half of big }
|
|||
|
versicle1 Cf versicle
|
|||
|
*/
|
|||
|
|
|||
|
|
|||
|
static Rune normtab[128] = {
|
|||
|
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
|||
|
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
|
|||
|
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
|||
|
/*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
|||
|
0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
|
|||
|
/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
|||
|
0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f,
|
|||
|
/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
|||
|
0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
|||
|
/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
|||
|
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
|||
|
/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
|||
|
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
|||
|
};
|
|||
|
static Rune phtab[128] = {
|
|||
|
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
|||
|
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*20*/ 0x20, 0x21, 0x2c8, 0x23, 0x24, 0x2cc, 0xe6, '\'',
|
|||
|
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
|||
|
/*30*/ 0x30, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37,
|
|||
|
0x38, 0xf8, 0x2d0, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
|
|||
|
/*40*/ 0x259, 0x251, 0x42, 0x43, 0xf0, 0x25b, 0x46, 0x47,
|
|||
|
0x48, 0x26a, 0x4a, 0x4b, L'L', 0x4d, 0x14b, 0x254,
|
|||
|
/*50*/ 0x50, 0x252, 0x52, 0x283, 0x3b8, 0x28a, 0x28c, 0x57,
|
|||
|
0x58, 0x59, 0x292, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
|||
|
/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
|||
|
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
|||
|
/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
|||
|
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
|||
|
};
|
|||
|
static Rune grtab[128] = {
|
|||
|
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
|||
|
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
|
|||
|
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
|||
|
/*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
|||
|
0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
|
|||
|
/*40*/ 0x40, 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393,
|
|||
|
0x397, 0x399, 0x3da, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f,
|
|||
|
/*50*/ 0x3a0, 0x398, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x56, 0x3a9,
|
|||
|
0x3a7, 0x3a8, 0x396, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
|||
|
/*60*/ 0x60, 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3,
|
|||
|
0x3b7, 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf,
|
|||
|
/*70*/ 0x3c0, 0x3b8, 0x3c1, 0x3c3, 0x3c4, 0x3c5, 0x76, 0x3c9,
|
|||
|
0x3c7, 0x3c8, 0x3b6, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
|||
|
};
|
|||
|
static Rune subtab[128] = {
|
|||
|
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
|||
|
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
|
|||
|
0x208d, 0x208e, 0x2a, 0x208a, 0x2c, 0x208b, 0x2e, 0x2f,
|
|||
|
/*30*/ 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087,
|
|||
|
0x2088, 0x2089, 0x3a, 0x3b, TAGS, 0x208c, TAGE, 0x3f,
|
|||
|
/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
|||
|
0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f,
|
|||
|
/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
|||
|
0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
|||
|
/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
|||
|
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
|||
|
/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
|||
|
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
|||
|
};
|
|||
|
static Rune suptab[128] = {
|
|||
|
/*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
|
|||
|
/*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
|||
|
/*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
|
|||
|
0x207d, 0x207e, 0x2a, 0x207a, 0x2c, 0x207b, 0x2e, 0x2f,
|
|||
|
/*30*/ 0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077,
|
|||
|
0x2078, 0x2079, 0x3a, 0x3b, TAGS, 0x207c, TAGE, 0x3f,
|
|||
|
/*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
|||
|
0x48, 0x49, 0x4a, 0x4b, L'L', 0x4d, 0x4e, 0x4f,
|
|||
|
/*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
|||
|
0x58, 0x59, 0x5a, 0x5b, L'\\', 0x5d, 0x5e, 0x5f,
|
|||
|
/*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
|||
|
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
|||
|
/*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
|||
|
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE,
|
|||
|
};
|
|||
|
|
|||
|
static int tagstarts;
|
|||
|
static char tag[Buflen];
|
|||
|
static int naux;
|
|||
|
static char auxname[Maxaux][Buflen];
|
|||
|
static char auxval[Maxaux][Buflen];
|
|||
|
static char spec[Buflen];
|
|||
|
static uchar *auxstate[Naux]; /* vals for most recent tag */
|
|||
|
static Entry curentry;
|
|||
|
#define cursize (curentry.end-curentry.start)
|
|||
|
|
|||
|
static char *getspec(char *, char *);
|
|||
|
static char *gettag(char *, char *);
|
|||
|
static void dostatus(void);
|
|||
|
|
|||
|
/*
|
|||
|
* cmd is one of:
|
|||
|
* 'p': normal print
|
|||
|
* 'h': just print headwords
|
|||
|
* 'P': print raw
|
|||
|
*/
|
|||
|
void
|
|||
|
oedprintentry(Entry e, int cmd)
|
|||
|
{
|
|||
|
char *p, *pe;
|
|||
|
int t, a, i;
|
|||
|
long r, rprev, rlig;
|
|||
|
Rune *transtab;
|
|||
|
|
|||
|
p = e.start;
|
|||
|
pe = e.end;
|
|||
|
transtab = normtab;
|
|||
|
rprev = NONE;
|
|||
|
changett(0, 0, 0);
|
|||
|
curentry = e;
|
|||
|
if(cmd == 'h')
|
|||
|
outinhibit = 1;
|
|||
|
while(p < pe) {
|
|||
|
if(cmd == 'r') {
|
|||
|
outchar(*p++);
|
|||
|
continue;
|
|||
|
}
|
|||
|
r = transtab[(*p++)&0x7F];
|
|||
|
if(r < NONE) {
|
|||
|
/* Emit the rune, but buffer in case of ligature */
|
|||
|
if(rprev != NONE)
|
|||
|
outrune(rprev);
|
|||
|
rprev = r;
|
|||
|
} else if(r == SPCS) {
|
|||
|
/* Start of special character name */
|
|||
|
p = getspec(p, pe);
|
|||
|
r = lookassoc(spectab, asize(spectab), spec);
|
|||
|
if(r == -1) {
|
|||
|
if(debug)
|
|||
|
err("spec %ld %d %s",
|
|||
|
e.doff, cursize, spec);
|
|||
|
r = 0xfffd;
|
|||
|
}
|
|||
|
if(r >= LIGS && r < LIGE) {
|
|||
|
/* handle possible ligature */
|
|||
|
rlig = liglookup(r, rprev);
|
|||
|
if(rlig != NONE)
|
|||
|
rprev = rlig; /* overwrite rprev */
|
|||
|
else {
|
|||
|
/* could print accent, but let's not */
|
|||
|
if(rprev != NONE) outrune(rprev);
|
|||
|
rprev = NONE;
|
|||
|
}
|
|||
|
} else if(r >= MULTI && r < MULTIE) {
|
|||
|
if(rprev != NONE) {
|
|||
|
outrune(rprev);
|
|||
|
rprev = NONE;
|
|||
|
}
|
|||
|
outrunes(multitab[r-MULTI]);
|
|||
|
} else if(r == PAR) {
|
|||
|
if(rprev != NONE) {
|
|||
|
outrune(rprev);
|
|||
|
rprev = NONE;
|
|||
|
}
|
|||
|
outnl(1);
|
|||
|
} else {
|
|||
|
if(rprev != NONE) outrune(rprev);
|
|||
|
rprev = r;
|
|||
|
}
|
|||
|
} else if(r == TAGS) {
|
|||
|
/* Start of tag name */
|
|||
|
if(rprev != NONE) {
|
|||
|
outrune(rprev);
|
|||
|
rprev = NONE;
|
|||
|
}
|
|||
|
p = gettag(p, pe);
|
|||
|
t = lookassoc(tagtab, asize(tagtab), tag);
|
|||
|
if(t == -1) {
|
|||
|
if(debug)
|
|||
|
err("tag %ld %d %s",
|
|||
|
e.doff, cursize, tag);
|
|||
|
continue;
|
|||
|
}
|
|||
|
for(i = 0; i < Naux; i++)
|
|||
|
auxstate[i] = 0;
|
|||
|
for(i = 0; i < naux; i++) {
|
|||
|
a = lookassoc(auxtab, asize(auxtab), auxname[i]);
|
|||
|
if(a == -1) {
|
|||
|
if(debug)
|
|||
|
err("aux %ld %d %s",
|
|||
|
e.doff, cursize, auxname[i]);
|
|||
|
} else
|
|||
|
auxstate[a] = auxval[i];
|
|||
|
}
|
|||
|
switch(t){
|
|||
|
case E:
|
|||
|
case Ve:
|
|||
|
outnl(0);
|
|||
|
if(tagstarts)
|
|||
|
dostatus();
|
|||
|
break;
|
|||
|
case Ed:
|
|||
|
case Etym:
|
|||
|
outchar(tagstarts? '[' : ']');
|
|||
|
break;
|
|||
|
case Pr:
|
|||
|
outchar(tagstarts? '(' : ')');
|
|||
|
break;
|
|||
|
case In:
|
|||
|
transtab = changett(transtab, subtab, tagstarts);
|
|||
|
break;
|
|||
|
case Hm:
|
|||
|
case Su:
|
|||
|
case Fq:
|
|||
|
transtab = changett(transtab, suptab, tagstarts);
|
|||
|
break;
|
|||
|
case Gk:
|
|||
|
transtab = changett(transtab, grtab, tagstarts);
|
|||
|
break;
|
|||
|
case Ph:
|
|||
|
transtab = changett(transtab, phtab, tagstarts);
|
|||
|
break;
|
|||
|
case Hw:
|
|||
|
if(cmd == 'h') {
|
|||
|
if(!tagstarts)
|
|||
|
outchar(' ');
|
|||
|
outinhibit = !tagstarts;
|
|||
|
}
|
|||
|
break;
|
|||
|
case S0:
|
|||
|
case S1:
|
|||
|
case S2:
|
|||
|
case S3:
|
|||
|
case S4:
|
|||
|
case S5:
|
|||
|
case S6:
|
|||
|
case S7a:
|
|||
|
case S7n:
|
|||
|
case Sn:
|
|||
|
case Sgk:
|
|||
|
if(tagstarts) {
|
|||
|
outnl(2);
|
|||
|
dostatus();
|
|||
|
if(auxstate[Num]) {
|
|||
|
if(t == S3 || t == S5) {
|
|||
|
i = atoi(auxstate[Num]);
|
|||
|
while(i--)
|
|||
|
outchar('*');
|
|||
|
outchars(" ");
|
|||
|
} else if(t == S7a || t == S7n || t == Sn) {
|
|||
|
outchar('(');
|
|||
|
outchars(auxstate[Num]);
|
|||
|
outchars(") ");
|
|||
|
} else if(t == Sgk) {
|
|||
|
i = grtab[auxstate[Num][0]];
|
|||
|
if(i != NONE)
|
|||
|
outrune(i);
|
|||
|
outchars(". ");
|
|||
|
} else {
|
|||
|
outchars(auxstate[Num]);
|
|||
|
outchars(". ");
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
break;
|
|||
|
case Cb:
|
|||
|
case Db:
|
|||
|
case Qp:
|
|||
|
case P:
|
|||
|
if(tagstarts)
|
|||
|
outnl(1);
|
|||
|
break;
|
|||
|
case Table:
|
|||
|
/*
|
|||
|
* Todo: gather columns, justify them, etc.
|
|||
|
* For now, just let colums come out as rows
|
|||
|
*/
|
|||
|
if(!tagstarts)
|
|||
|
outnl(0);
|
|||
|
break;
|
|||
|
case Col:
|
|||
|
if(tagstarts)
|
|||
|
outnl(0);
|
|||
|
break;
|
|||
|
case Dn:
|
|||
|
if(tagstarts)
|
|||
|
outchar('/');
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if(cmd == 'h') {
|
|||
|
outinhibit = 0;
|
|||
|
outnl(0);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/*
|
|||
|
* Return offset into bdict where next oed entry after fromoff starts.
|
|||
|
* Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
|
|||
|
*/
|
|||
|
long
|
|||
|
oednextoff(long fromoff)
|
|||
|
{
|
|||
|
long a, n;
|
|||
|
int c;
|
|||
|
|
|||
|
a = Bseek(bdict, fromoff, 0);
|
|||
|
if(a < 0)
|
|||
|
return -1;
|
|||
|
n = 0;
|
|||
|
for(;;) {
|
|||
|
c = Bgetc(bdict);
|
|||
|
if(c < 0)
|
|||
|
break;
|
|||
|
if(c == '<') {
|
|||
|
c = Bgetc(bdict);
|
|||
|
if(c == 'e') {
|
|||
|
c = Bgetc(bdict);
|
|||
|
if(c == '>' || c == ' ')
|
|||
|
n = 3;
|
|||
|
} else if(c == 'v' && Bgetc(bdict) == 'e') {
|
|||
|
c = Bgetc(bdict);
|
|||
|
if(c == '>' || c == ' ')
|
|||
|
n = 4;
|
|||
|
}
|
|||
|
if(n)
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
return (Boffset(bdict)-n);
|
|||
|
}
|
|||
|
|
|||
|
static char *prkey =
|
|||
|
"KEY TO THE PRONUNCIATION\n"
|
|||
|
"\n"
|
|||
|
"I. CONSONANTS\n"
|
|||
|
"b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
|
|||
|
"\n"
|
|||
|
"g as in go (gəʊ)\n"
|
|||
|
"h ... ho! (həʊ)\n"
|
|||
|
"r ... run (rʌn), terrier (ˈtɛriə(r))\n"
|
|||
|
"(r)... her (hɜː(r))\n"
|
|||
|
"s ... see (siː), success (səkˈsɜs)\n"
|
|||
|
"w ... wear (wɛə(r))\n"
|
|||
|
"hw ... when (hwɛn)\n"
|
|||
|
"j ... yes (jɛs)\n"
|
|||
|
"θ ... thin (θin), bath (bɑːθ)\n"
|
|||
|
"ð ... then (ðɛn), bathe (beɪð)\n"
|
|||
|
"ʃ ... shop (ʃɒp), dish (dɪʃ)\n"
|
|||
|
"tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n"
|
|||
|
"ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n"
|
|||
|
"dʒ ... judge (dʒʌdʒ)\n"
|
|||
|
"ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n"
|
|||
|
"ŋg ... finger (ˈfiŋgə(r))\n"
|
|||
|
"\n"
|
|||
|
"Foreign\n"
|
|||
|
"ʎ as in It. seraglio (serˈraʎo)\n"
|
|||
|
"ɲ ... Fr. cognac (kɔɲak)\n"
|
|||
|
"x ... Ger. ach (ax), Sc. loch (lɒx)\n"
|
|||
|
"ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
|
|||
|
"ɣ ... North Ger. sagen (ˈzaːɣən)\n"
|
|||
|
"c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
|
|||
|
"ɥ ... Fr. cuisine (kɥizin)\n"
|
|||
|
"\n"
|
|||
|
"II. VOWELS AND DIPTHONGS\n"
|
|||
|
"\n"
|
|||
|
"Short\n"
|
|||
|
"ɪ as in pit (pɪt), -ness (-nɪs)\n"
|
|||
|
"ɛ ... pet (pɛt), Fr. sept (sɛt)\n"
|
|||
|
"æ ... pat (pæt)\n"
|
|||
|
"ʌ ... putt (pʌt)\n"
|
|||
|
"ɒ ... pot (pɒt)\n"
|
|||
|
"ʊ ... put (pʊt)\n"
|
|||
|
"ə ... another (əˈnʌðə(r))\n"
|
|||
|
"(ə)... beaten (ˈbiːt(ə)n)\n"
|
|||
|
"i ... Fr. si (si)\n"
|
|||
|
"e ... Fr. bébé (bebe)\n"
|
|||
|
"a ... Fr. mari (mari)\n"
|
|||
|
"ɑ ... Fr. bâtiment (bɑtimã)\n"
|
|||
|
"ɔ ... Fr. homme (ɔm)\n"
|
|||
|
"o ... Fr. eau (o)\n"
|
|||
|
"ø ... Fr. peu (pø)\n"
|
|||
|
"œ ... Fr. boeuf (bœf), coeur (kœr)\n"
|
|||
|
"u ... Fr. douce (dus)\n"
|
|||
|
"ʏ ... Ger. Müller (ˈmʏlər)\n"
|
|||
|
"y ... Fr. du (dy)\n"
|
|||
|
"\n"
|
|||
|
"Long\n"
|
|||
|
"iː as in bean (biːn)\n"
|
|||
|
"ɑː ... barn (bɑːn)\n"
|
|||
|
"ɔː ... born (bɔːn)\n"
|
|||
|
"uː ... boon (buːn)\n"
|
|||
|
"ɜː ... burn (bɜːn)\n"
|
|||
|
"eː ... Ger. Schnee (ʃneː)\n"
|
|||
|
"ɛː ... Ger. Fähre (ˈfɛːrə)\n"
|
|||
|
"aː ... Ger. Tag (taːk)\n"
|
|||
|
"oː ... Ger. Sohn (zoːn)\n"
|
|||
|
"øː ... Ger. Goethe (gøːtə)\n"
|
|||
|
"yː ... Ger. grün (gryːn)\n"
|
|||
|
"\n"
|
|||
|
"Nasal\n"
|
|||
|
"ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
|
|||
|
"ã ... Fr. franc (frã)\n"
|
|||
|
"ɔ˜ ... Fr. bon (bɔ˜n)\n"
|
|||
|
"œ˜ ... Fr. un (œ˜)\n"
|
|||
|
"\n"
|
|||
|
"Dipthongs, etc.\n"
|
|||
|
"eɪ as in bay (beɪ)\n"
|
|||
|
"aɪ ... buy (baɪ)\n"
|
|||
|
"ɔɪ ... boy (bɔɪ)\n"
|
|||
|
"əʊ ... no (nəʊ)\n"
|
|||
|
"aʊ ... now (naʊ)\n"
|
|||
|
"ɪə ... peer (pɪə(r))\n"
|
|||
|
"ɛə ... pair (pɛə(r))\n"
|
|||
|
"ʊə ... tour (tʊə(r))\n"
|
|||
|
"ɔə ... boar (bɔə(r))\n"
|
|||
|
"\n"
|
|||
|
"III. STRESS\n"
|
|||
|
"\n"
|
|||
|
"Main stress: ˈ preceding stressed syllable\n"
|
|||
|
"Secondary stress: ˌ preceding stressed syllable\n"
|
|||
|
"\n"
|
|||
|
"E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
|
|||
|
/* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
|
|||
|
|
|||
|
void
|
|||
|
oedprintkey(void)
|
|||
|
{
|
|||
|
Bprint(bout, "%s", prkey);
|
|||
|
}
|
|||
|
|
|||
|
/*
|
|||
|
* f points just after a '&', fe points at end of entry.
|
|||
|
* Accumulate the special name, starting after the &
|
|||
|
* and continuing until the next '.', in spec[].
|
|||
|
* Return pointer to char after '.'.
|
|||
|
*/
|
|||
|
static char *
|
|||
|
getspec(char *f, char *fe)
|
|||
|
{
|
|||
|
char *t;
|
|||
|
int c, i;
|
|||
|
|
|||
|
t = spec;
|
|||
|
i = sizeof spec;
|
|||
|
while(--i > 0) {
|
|||
|
c = *f++;
|
|||
|
if(c == '.' || f == fe)
|
|||
|
break;
|
|||
|
*t++ = c;
|
|||
|
}
|
|||
|
*t = 0;
|
|||
|
return f;
|
|||
|
}
|
|||
|
|
|||
|
/*
|
|||
|
* f points just after '<'; fe points at end of entry.
|
|||
|
* Expect next characters from bin to match:
|
|||
|
* [/][^ >]+( [^>=]+=[^ >]+)*>
|
|||
|
* tag auxname auxval
|
|||
|
* Accumulate the tag and its auxilliary information in
|
|||
|
* tag[], auxname[][] and auxval[][].
|
|||
|
* Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
|
|||
|
* Set naux to the number of aux pairs found.
|
|||
|
* Return pointer to after final '>'.
|
|||
|
*/
|
|||
|
static char *
|
|||
|
gettag(char *f, char *fe)
|
|||
|
{
|
|||
|
char *t;
|
|||
|
int c, i;
|
|||
|
|
|||
|
t = tag;
|
|||
|
c = *f++;
|
|||
|
if(c == '/')
|
|||
|
tagstarts = 0;
|
|||
|
else {
|
|||
|
tagstarts = 1;
|
|||
|
*t++ = c;
|
|||
|
}
|
|||
|
i = Buflen;
|
|||
|
naux = 0;
|
|||
|
while(--i > 0) {
|
|||
|
c = *f++;
|
|||
|
if(c == '>' || f == fe)
|
|||
|
break;
|
|||
|
if(c == ' ') {
|
|||
|
*t = 0;
|
|||
|
t = auxname[naux];
|
|||
|
i = Buflen;
|
|||
|
if(naux < Maxaux-1)
|
|||
|
naux++;
|
|||
|
} else if(naux && c == '=') {
|
|||
|
*t = 0;
|
|||
|
t = auxval[naux-1];
|
|||
|
i = Buflen;
|
|||
|
} else
|
|||
|
*t++ = c;
|
|||
|
}
|
|||
|
*t = 0;
|
|||
|
return f;
|
|||
|
}
|
|||
|
|
|||
|
static void
|
|||
|
dostatus(void)
|
|||
|
{
|
|||
|
char *s;
|
|||
|
|
|||
|
s = auxstate[St];
|
|||
|
if(s) {
|
|||
|
if(strcmp(s, "obs") == 0)
|
|||
|
outrune(0x2020);
|
|||
|
else if(strcmp(s, "ali") == 0)
|
|||
|
outrune(0x2016);
|
|||
|
else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
|
|||
|
outrune(0xb6);
|
|||
|
else if(strcmp(s, "xref") == 0)
|
|||
|
{/* nothing */}
|
|||
|
else if(debug)
|
|||
|
err("status %ld %d %s", curentry.doff, cursize, s);
|
|||
|
}
|
|||
|
}
|