mirror of
git://git.9front.org/plan9front/plan9front
synced 2025-01-12 11:10:06 +00:00
ktrans: pinyin, cleanup and documentation updates
* Added pinyin alternative Chinese input dictionary * Remove Cyrilic and Greek input, use kbmap instead * Ensure ktrans dictionaries are copied to iso * Cleanup ktrans(1) * Document dictionary format in ktrans(6) * Fix ktrans example in riow(1)
This commit is contained in:
parent
e5c7fe6305
commit
19b38409fc
10 changed files with 28439 additions and 222 deletions
|
@ -1,81 +0,0 @@
|
|||
YO Ё
|
||||
Yo Ё
|
||||
A А
|
||||
B Б
|
||||
V В
|
||||
G Г
|
||||
D Д
|
||||
Ye Е
|
||||
YE Е
|
||||
E Е
|
||||
Zh Ж
|
||||
ZH Ж
|
||||
Z З
|
||||
I И
|
||||
J Й
|
||||
K К
|
||||
L Л
|
||||
M М
|
||||
N Н
|
||||
O О
|
||||
P П
|
||||
R Р
|
||||
S С
|
||||
T Т
|
||||
U У
|
||||
F Ф
|
||||
Kh Х
|
||||
KH Х
|
||||
X Х
|
||||
Ts Ц
|
||||
TS Ц
|
||||
Ch Ч
|
||||
CH Ч
|
||||
Sh Ш
|
||||
SH Ш
|
||||
Shch Щ
|
||||
SHCH Щ
|
||||
'' ъ
|
||||
Y Ы
|
||||
' ь
|
||||
EH Э
|
||||
Eh Э
|
||||
Yu Ю
|
||||
YU Ю
|
||||
Ya Я
|
||||
YA Я
|
||||
a а
|
||||
b б
|
||||
v в
|
||||
g г
|
||||
d д
|
||||
ye е
|
||||
e е
|
||||
zh ж
|
||||
z з
|
||||
i и
|
||||
j й
|
||||
k к
|
||||
l л
|
||||
m м
|
||||
n н
|
||||
o о
|
||||
p п
|
||||
r р
|
||||
s с
|
||||
t т
|
||||
u у
|
||||
f ф
|
||||
kh х
|
||||
x х
|
||||
ts ц
|
||||
ch ч
|
||||
sh ш
|
||||
shch щ
|
||||
'' ъ
|
||||
y ы
|
||||
' ь
|
||||
eh э
|
||||
yu ю
|
||||
ya я
|
||||
yo ё
|
|
@ -1,82 +0,0 @@
|
|||
A Α
|
||||
'A Ά
|
||||
B Β
|
||||
G Γ
|
||||
D Δ
|
||||
E Ε
|
||||
'E Έ
|
||||
Z Ζ
|
||||
E! Η
|
||||
'E! Έ
|
||||
TH Θ
|
||||
Th Θ
|
||||
I Ι
|
||||
'I Ί
|
||||
K Κ
|
||||
L Λ
|
||||
M Μ
|
||||
N Ν
|
||||
KS Ξ
|
||||
Ks Ξ
|
||||
O Ο
|
||||
'O Ό
|
||||
P Π
|
||||
R Ρ
|
||||
S Σ
|
||||
T Τ
|
||||
U Υ
|
||||
'U Ύ
|
||||
F Φ
|
||||
CH Χ
|
||||
Ch Χ
|
||||
PS Ψ
|
||||
Ps Ψ
|
||||
O! Ω
|
||||
W Ω
|
||||
'O! Ώ
|
||||
'W Ώ
|
||||
a α
|
||||
'a ά
|
||||
b β
|
||||
v β
|
||||
g γ
|
||||
d δ
|
||||
e ε
|
||||
'e έ
|
||||
z ζ
|
||||
e! η
|
||||
'e! ή
|
||||
ii η
|
||||
'ii ή
|
||||
h η
|
||||
'h ή
|
||||
th θ
|
||||
i ι
|
||||
'i ί
|
||||
k κ
|
||||
l λ
|
||||
m μ
|
||||
n ν
|
||||
ks ξ
|
||||
x ξ
|
||||
o ο
|
||||
'o ό
|
||||
p π
|
||||
r ρ
|
||||
s ς
|
||||
s. ς
|
||||
s ς
|
||||
s\n ς
|
||||
s σ
|
||||
t τ
|
||||
u υ
|
||||
'u ΰ
|
||||
y υ
|
||||
'y ΰ
|
||||
f φ
|
||||
ch χ
|
||||
ps ψ
|
||||
o! ω
|
||||
w ω
|
||||
'o! ώ
|
||||
'w ώ
|
28
lib/ktrans/judou.map
Normal file
28
lib/ktrans/judou.map
Normal file
|
@ -0,0 +1,28 @@
|
|||
|
||||
, ,
|
||||
. 。
|
||||
< 《
|
||||
> 》
|
||||
/ /
|
||||
? ?
|
||||
; ;
|
||||
: :
|
||||
\ 、
|
||||
| ・
|
||||
` `
|
||||
~ 〜
|
||||
! !
|
||||
@ @
|
||||
# #
|
||||
$ ¥
|
||||
& &
|
||||
* *
|
||||
( (
|
||||
) )
|
||||
- -
|
||||
+ +
|
||||
= =
|
||||
[ 「
|
||||
] 」
|
||||
{ 『
|
||||
} 』
|
28294
lib/ktrans/pinyin.dict
Normal file
28294
lib/ktrans/pinyin.dict
Normal file
File diff suppressed because it is too large
Load diff
|
@ -25,6 +25,8 @@ lib d775
|
|||
font d775
|
||||
bit d775
|
||||
+
|
||||
ktrans d755
|
||||
+
|
||||
legal d775
|
||||
*
|
||||
map d775
|
||||
|
|
|
@ -42,12 +42,6 @@ Japanese Katakana.
|
|||
.B ctl-c
|
||||
Chinese.
|
||||
.TP
|
||||
.B ctl-r
|
||||
Russian.
|
||||
.TP
|
||||
.B ctl-o
|
||||
Greek.
|
||||
.TP
|
||||
.B ctl-s
|
||||
Korean.
|
||||
.TP
|
||||
|
@ -75,8 +69,7 @@ be explicitly matched by cycling through a list of options.
|
|||
automatically maintains a buffer of the current series of
|
||||
key strokes being considered for an explicit match, and resets
|
||||
that buffer on logical "word" breaks depending on the language.
|
||||
However manual hints of when to reset this buffer will likely
|
||||
still be required.
|
||||
However in some cases the automatic hinting will be insufficient.
|
||||
.PP
|
||||
Input is always passed along, when a match is found
|
||||
.I Ktrans
|
||||
|
@ -113,7 +106,7 @@ Clear Kanji buffer (ctl-l)
|
|||
Switch to Hiragana (ctl-n)
|
||||
.TP
|
||||
.B Shift + Hiragana / Katakana
|
||||
Switch to Katakana (ctl-v)
|
||||
Switch to Katakana (ctl-k)
|
||||
.TP
|
||||
.B Hankaku / Zenkaku
|
||||
Switch to Hiragana (ctl-n)
|
||||
|
@ -124,7 +117,23 @@ Switch to passthrough (ctl-t)
|
|||
.B Shift + Space
|
||||
Convert to Kanji (ctl-\e).
|
||||
This is a fallback for keyboards without a physical Henkan key.
|
||||
.SH JAPANESE
|
||||
.SH DICTIONARIES
|
||||
All implicit and explicit matching dictionaries are provided as plain
|
||||
text files within
|
||||
.BR /lib/ktrans .
|
||||
The formats of which are specified within
|
||||
.IR ktrans (6).
|
||||
Users may create and or modify existing dictionaries by binding over
|
||||
the system defaults.
|
||||
.PP
|
||||
For backwards compatibility the
|
||||
.B jisho
|
||||
and
|
||||
.B zidian
|
||||
environment variables may also be set to pick explicit lookup dictionaries
|
||||
for Japanese and Chinese respectfully.
|
||||
.SH LANGUAGES
|
||||
.SS JAPANESE
|
||||
The Hiragana and Katakana modes implicitly turn Hepburn representations
|
||||
in to their Kana counterparts. Explicit conversions combine sequences
|
||||
of Hiragana in to Kanji.
|
||||
|
@ -145,34 +154,19 @@ as part of the lookup sequence itself. So to write
|
|||
私の猫
|
||||
.ft
|
||||
the user types "watashiNO[^\e]neko[^\e]". Note that in both cases
|
||||
we have successfully communicated to krans when to reset the explicit
|
||||
we have successfully communicated to ktrans when to reset the explicit
|
||||
match buffer without needing to explicitly give a ctl-l character.
|
||||
.SH CHINESE
|
||||
The Wubizixing input method is used. No implicit conversion is done,
|
||||
explicit conversion interprets Latin characters as their Wubi counterparts
|
||||
to do lookup of Hanzi.
|
||||
.SH RUSSIAN
|
||||
Implicit layer converts latin to Cyrillic; the transliteration is mostly
|
||||
phonetic, with
|
||||
.B '
|
||||
for
|
||||
.IR myagkij-znak
|
||||
(ь),
|
||||
.B ''
|
||||
for
|
||||
.I tverdyj-znak
|
||||
(ъ)
|
||||
.I yo
|
||||
for ё,
|
||||
.B j
|
||||
for
|
||||
.IR i-kratkaya
|
||||
(й).
|
||||
.SH VIETNAMESE
|
||||
.SS CHINESE
|
||||
Implicit conversion converts punctuation. Explicit matches
|
||||
use a dictionary to convert a series of Latin characters
|
||||
into Hanzi. By default a Wubizixing input dictionary
|
||||
is used. Additionally a Pinyin input dictionary
|
||||
is provided.
|
||||
.SS VIETNAMESE
|
||||
Implicit conversion is modeled after Telex, supporting
|
||||
standard diacritic suffixes.
|
||||
.SH KOREAN
|
||||
Mapping is done by emulating a Dubeolsik layout, with each latin
|
||||
.SS KOREAN
|
||||
Mapping is done by emulating a Dubeolsik layout, with each Latin
|
||||
character mapping to a single Jamo. Sequences of up to three Jamo
|
||||
are automatically converted to Hangul syllables.
|
||||
.SH EXAMPLES
|
||||
|
@ -195,14 +189,15 @@ respectively.
|
|||
.SH SOURCE
|
||||
.B /sys/src/cmd/ktrans
|
||||
.SH SEE ALSO
|
||||
.IR rio (4)
|
||||
.IR ktrans (6),
|
||||
.IR rio (4),
|
||||
.IR kbdfs (8)
|
||||
.SH BUGS
|
||||
.PP
|
||||
There is no hint from rio when the user moves the cursor, as such
|
||||
moving it is unlikely to result in what the user expects.
|
||||
.PP
|
||||
Plan9 lacks support for rendering combinational Unicode sequences,
|
||||
Plan 9 lacks support for rendering combinational Unicode sequences,
|
||||
limiting the use of some code ranges.
|
||||
.SH HISTORY
|
||||
Ktrans was originally written by Kenji Okamoto in August of 2000 for
|
||||
|
|
|
@ -36,7 +36,7 @@ Example of running
|
|||
.I riow
|
||||
with other programs handling input:
|
||||
.EX
|
||||
</dev/kbdtap ktrans |
|
||||
</dev/kbdtap ktrans -G |
|
||||
reform/shortcuts |
|
||||
riow >/dev/kbdtap |[3] bar
|
||||
.EE
|
||||
|
|
56
sys/man/6/ktrans
Normal file
56
sys/man/6/ktrans
Normal file
|
@ -0,0 +1,56 @@
|
|||
.TH KTRANS 6
|
||||
.SH NAME
|
||||
ktrans \- format of ktrans dictionaries
|
||||
.SH DESCRIPTION
|
||||
.I Ktrans
|
||||
uses two plain text file formats for lookup dictionaries, one each for
|
||||
implicit and explicit matches. Both are stored within
|
||||
.BR /lib/ktrans .
|
||||
.SS MAP
|
||||
The
|
||||
.B .map
|
||||
files provide the implicit matching information. The file is a
|
||||
sequence of lines, each of which is a single character sequence mapping.
|
||||
Each mapping is a sequence of one or more input keys and output keys, separated
|
||||
by a single tab character.
|
||||
.PP
|
||||
The following
|
||||
.B .map
|
||||
files correspond to the following conversions:
|
||||
.TP
|
||||
hira.map
|
||||
Japanese Hiragana
|
||||
.TP
|
||||
kata.map
|
||||
Japanese Katakana
|
||||
.TP
|
||||
judou.map
|
||||
Chinese punctuation
|
||||
.TP
|
||||
hangul.map
|
||||
Korean Hangul
|
||||
.TP
|
||||
telex.map
|
||||
Vietnamese Telex
|
||||
.SS DICT
|
||||
The
|
||||
.B .dict
|
||||
files provide the explicit matching information. The file is
|
||||
a sequence of lines, each of which is comprised of an input sequence,
|
||||
followed by a tab, followed by the list of all candidates. Each candidate
|
||||
is separated by a single space character.
|
||||
.PP
|
||||
The following
|
||||
.B .dict
|
||||
files correspond to the following conversions:
|
||||
.TP
|
||||
kanji.dict
|
||||
Japanese Kanji
|
||||
.TP
|
||||
wubi.dict
|
||||
Chinese Wubizixing (default)
|
||||
.TP
|
||||
pinyin.dict
|
||||
Chinese Pinyin
|
||||
.SH "SEE ALSO"
|
||||
.IR ktrans (1)
|
|
@ -34,7 +34,7 @@ hmapalloc(int nbuckets, int size)
|
|||
nsz = Tagsize + size;
|
||||
store = mallocz(sizeof(*h) + (nbuckets * nsz), 1);
|
||||
if(store == nil)
|
||||
return nil;
|
||||
sysfatal("hmapalloc: out of memory");
|
||||
|
||||
h = store;
|
||||
h->nbs = nbuckets;
|
||||
|
@ -82,6 +82,8 @@ hmaprepl(Hmap **store, char *key, void *new, void *old, int freekeys)
|
|||
|
||||
h->cap *= 2;
|
||||
*store = realloc(*store, sizeof(*h) + h->cap*h->nsz);
|
||||
if(*store == nil)
|
||||
sysfatal("hmaprepl: out of memory");
|
||||
h = *store;
|
||||
h->nodes = (uchar*)*store + sizeof(*h);
|
||||
memset(h->nodes + h->len*h->nsz, 0, h->nsz);
|
||||
|
|
|
@ -198,8 +198,6 @@ enum{
|
|||
LangEN = '', // ^t
|
||||
LangJP = '', // ^n
|
||||
LangJPK = '', // ^k
|
||||
LangRU = '', // ^r
|
||||
LangEL = '', // ^o
|
||||
LangKO = '', // ^s
|
||||
LangZH = '', // ^c
|
||||
LangVN = '', // ^v
|
||||
|
@ -209,20 +207,16 @@ int deflang;
|
|||
|
||||
Hmap *natural;
|
||||
Hmap *hira, *kata, *jisho;
|
||||
Hmap *cyril;
|
||||
Hmap *greek;
|
||||
Hmap *hangul;
|
||||
Hmap *hanzi, *zidian;
|
||||
Hmap *judou, *zidian;
|
||||
Hmap *telex;
|
||||
|
||||
Hmap **langtab[] = {
|
||||
[LangEN] &natural,
|
||||
[LangJP] &hira,
|
||||
[LangJPK] &kata,
|
||||
[LangRU] &cyril,
|
||||
[LangEL] &greek,
|
||||
[LangKO] &hangul,
|
||||
[LangZH] &hanzi,
|
||||
[LangZH] &judou,
|
||||
[LangVN] &telex,
|
||||
};
|
||||
|
||||
|
@ -230,8 +224,6 @@ char *langcodetab[] = {
|
|||
[LangEN] "en",
|
||||
[LangJP] "jp",
|
||||
[LangJPK] "jpk",
|
||||
[LangRU] "ru",
|
||||
[LangEL] "el",
|
||||
[LangKO] "ko",
|
||||
[LangZH] "zh",
|
||||
[LangVN] "vn",
|
||||
|
@ -527,7 +519,7 @@ dictthread(void*)
|
|||
mode = Okuri;
|
||||
*p = tolower(*p);
|
||||
okuri.p = pushutf(okuri.b, strend(&okuri), p, 1);
|
||||
goto Line;
|
||||
goto Line;
|
||||
}
|
||||
|
||||
switch(mode){
|
||||
|
@ -652,7 +644,7 @@ keythread(void*)
|
|||
switch(lang){
|
||||
case LangZH:
|
||||
emitutf(dictch, p, 1);
|
||||
continue;
|
||||
break;
|
||||
case LangJP:
|
||||
emitutf(dictch, p, 1);
|
||||
if(isupper(*p))
|
||||
|
@ -792,12 +784,24 @@ usage(void)
|
|||
threadexits("usage");
|
||||
}
|
||||
|
||||
struct {
|
||||
char *s;
|
||||
Hmap **m;
|
||||
} inittab[] = {
|
||||
"judou", &judou,
|
||||
"hira", &hira,
|
||||
"kata", &kata,
|
||||
"hangul", &hangul,
|
||||
"telex", &telex,
|
||||
};
|
||||
|
||||
mainstacksize = 8192*2;
|
||||
|
||||
void
|
||||
threadmain(int argc, char *argv[])
|
||||
{
|
||||
int nogui;
|
||||
int nogui, i;
|
||||
char buf[128];
|
||||
char *jishoname, *zidianname;
|
||||
|
||||
deflang = LangEN;
|
||||
|
@ -849,13 +853,12 @@ threadmain(int argc, char *argv[])
|
|||
zidianname = "/lib/ktrans/wubi.dict";
|
||||
zidian = opendict(nil, zidianname);
|
||||
|
||||
natural = hanzi = nil;
|
||||
hira = openmap("/lib/ktrans/hira.map");
|
||||
kata = openmap("/lib/ktrans/kata.map");
|
||||
greek = openmap("/lib/ktrans/greek.map");
|
||||
cyril = openmap("/lib/ktrans/cyril.map");
|
||||
hangul = openmap("/lib/ktrans/hangul.map");
|
||||
telex = openmap("/lib/ktrans/telex.map");
|
||||
natural = nil;
|
||||
for(i = 0; i < nelem(inittab); i++){
|
||||
snprint(buf, sizeof buf, "/lib/ktrans/%s.map", inittab[i].s);
|
||||
if((*inittab[i].m = openmap(buf)) == nil)
|
||||
sysfatal("failed to open map: %r");
|
||||
}
|
||||
|
||||
dictch = chancreate(Msgsize, 0);
|
||||
input = chancreate(Msgsize, 0);
|
||||
|
|
Loading…
Reference in a new issue