ktrans: 新しい降り

Rewrite of ktrans to act as an overlay to kbdfs.
Move map files outside of the binary in to /lib/ktrans
and get our dictionaries out of /lib while we're at it.

Use \n as an alias for ctrl-\ for first lookup, use ^w
to clear okurigana input. This also in general changes
the logic to never us to 'swallow' a character and always echo
them out, using backspaces for cleaning us up.
This commit is contained in:
Jacob Moody 2022-08-03 11:14:44 +00:00
parent 30794bba75
commit 0541a434cb
13 changed files with 3908 additions and 3440 deletions

81
lib/ktrans/cyril.map Normal file
View file

@ -0,0 +1,81 @@
YO Ё
Yo Ё
A А
B Б
V В
G Г
D Д
Ye Е
YE Е
E Е
Zh Ж
ZH Ж
Z З
I И
J Й
K К
L Л
M М
N Н
O О
P П
R Р
S С
T Т
U У
F Ф
Kh Х
KH Х
X Х
Ts Ц
TS Ц
Ch Ч
CH Ч
Sh Ш
SH Ш
Shch Щ
SHCH Щ
'' ъ
Y Ы
' ь
EH Э
Eh Э
Yu Ю
YU Ю
Ya Я
YA Я
a а
b б
v в
g г
d д
ye е
e е
zh ж
z з
i и
j й
k к
l л
m м
n н
o о
p п
r р
s с
t т
u у
f ф
kh х
x х
ts ц
ch ч
sh ш
shch щ
'' ъ
y ы
' ь
eh э
yu ю
ya я
yo ё

82
lib/ktrans/greek.map Normal file
View file

@ -0,0 +1,82 @@
A Α
'A Ά
B Β
G Γ
D Δ
E Ε
'E Έ
Z Ζ
E! Η
'E! Έ
TH Θ
Th Θ
I Ι
'I Ί
K Κ
L Λ
M Μ
N Ν
KS Ξ
Ks Ξ
O Ο
'O Ό
P Π
R Ρ
S Σ
T Τ
U Υ
'U Ύ
F Φ
CH Χ
Ch Χ
PS Ψ
Ps Ψ
O! Ω
W Ω
'O! Ώ
'W Ώ
a α
'a ά
b β
v β
g γ
d δ
e ε
'e έ
z ζ
e! η
'e! ή
ii η
'ii ή
h η
'h ή
th θ
i ι
'i ί
k κ
l λ
m μ
n ν
ks ξ
x ξ
o ο
'o ό
p π
r ρ
s ς
s. ς
s ς
s\n ς
s σ
t τ
u υ
'u ΰ
y υ
'y ΰ
f φ
ch χ
ps ψ
o! ω
w ω
'o! ώ
'w ώ

2396
lib/ktrans/hangul.map Normal file

File diff suppressed because it is too large Load diff

170
lib/ktrans/hira.map Normal file
View file

@ -0,0 +1,170 @@
a あ
- ー
i い
u う
e え
o お
ka か
ga が
ki き
kya きゃ
kyu きゅ
kyo きょ
gi ぎ
gya ぎゃ
gyu ぎゅ
gyo ぎょ
ku く
gu ぐ
ke け
ge げ
ko こ
go ご
sa さ
za ざ
si し
shi し
sha しゃ
shu しゅ
she しぇ
sho しょ
syo しょ
zi ぢ
ja じゃ
ju じゅ
je じぇ
jo じょ
su す
zu ず
se せ
ze ぜ
so そ
zo ぞ
ta た
da だ
ti ち
chi ち
cha ちゃ
chu ちゅ
tyu ちゅ
cho ちょ
ji じ
tu つ
tsu つ
du づ
te て
de で
to と
do ど
na な
ni に
nya にゃ
nyu にゅ
nyo にょ
nu ぬ
ne ね
no の
ha は
ba ば
va ば
pa ぱ
hi ひ
hya ひゃ
hyu ひゅ
hyo ひょ
bi び
bya びゃ
byu びゅ
byo びょ
vi び
pi ぴ
pya ぴゃ
pyu ぴゅ
pyo ぴょ
hu ふ
fu ふ
bu ぶ
vu ぶ
pu ぷ
he へ
be べ
ve べ
pe ぺ
ho ほ
bo ぼ
vo ぼ
po ぽ
ma ま
mi み
mya みゃ
myu みゅ
myo みょ
mu む
me め
mo も
ya や
yu ゆ
yo よ
ra ら
ri り
rya りゃ
ryu りゅ
ryo りょ
ru る
re れ
ro ろ
wa わ
wi ゐ
we ゑ
wo を
n ん
xn ん
xa ぁ
xi ぃ
xu ぅ
xe ぇ
xo ぉ
kka っか
kki っき
kkya っきゃ
kkyu っきゅ
kkyo っきょ
kku っく
kke っけ
kko っこ
ssa っさ
ssi っし
sshi っし
ssha っしゃ
sshu っしゅ
sshe っしぇ
ssho っしょ
ssyo っしょ
ssu っす
sse っせ
sso っそ
tta った
dda っだ
tti っち
cchi っち
ccha っちゃ
cchu っちゅ
ttyu っちゅ
ccho っちょ
ttu っつ
ttsu っつ
ddu っづ
tte って
dde っで
tto っと
ddo っど
ppa っぱ
ppi っぴ
ppya っぴゃ
ppyu っぴゅ
ppyo っぴょ
ppu っぷ
ppe っぺ
ppo っぽ
. 。
, 、

142
lib/ktrans/kata.map Normal file
View file

@ -0,0 +1,142 @@
a ア
- ー
i イ
u ウ
e エ
o オ
ka カ
ga ガ
ki キ
kya キャ
kyu キュ
kyo キョ
gi ギ
gya ギャ
gyu ギュ
gyo ギョ
ku ク
gu グ
ke ケ
ge ゲ
ko コ
go ゴ
sa サ
za ザ
si シ
shi シ
ji ジ
sha シャ
she シェ
shu シュ
je ジェ
sho ショ
syo ショ
ja ジャ
ju ジュ
je ジェ
jo ジョ
su ス
zu ズ
se セ
ze ゼ
so ソ
zo ゾ
ta タ
da ダ
ti ティ
chi チ
zi ヂ
cha チャ
chu チュ
tyu チュ
che チェ
cho チョ
tu ツ
tsu ツ
du ヅ
te テ
de デ
to ト
do ド
na ナ
ni ニ
nya ニャ
nyu ニュ
nyo ニョ
nu ヌ
ne ネ
no
ha ハ
ba バ
pa パ
hi ヒ
hya ヒャ
hyu ヒュ
hyo ヒョ
bi ビ
bya ビャ
byu ビュ
byo ビョ
pi ピ
pya ピャ
pyu ピュ
pyo ピョ
hu フ
fu フ
bu ブ
pu プ
he ヘ
be ベ
pe ペ
ho ホ
bo ボ
po ポ
ma マ
mi ミ
mya ミャ
myu ミュ
myo ミョ
mu ム
me メ
mo モ
ya ヤ
yu ユ
yo ヨ
ra ラ
ri リ
rya リャ
ryu リュ
ryo リョ
ru ル
re レ
ro ロ
wa ワ
wi ヰ
we ヱ
wo ヲ
n ン
xn ン
v ヴ
xa ァ
xi ィ
xe ェ
xo ォ
cc ッ
dd ッ
kk ッ
pp ッ
tt ッ
tch ッ
ss ッ
xn ン
di ディ
fa ファ
fi フィ
fe フェ
fo フォ
va ヴァ
vi ヴィ
ve ヴェ
vo ヴォ
. 。
, 、

View file

@ -1,121 +0,0 @@
.TH KTRANS 1
.SH NAME
ktrans \- language transliterator
.SH SYNOPSIS
.B ktrans
.SH DESCRIPTION
The
.I ktrans
program works with
.IR kbdfs (8)
to transliterate typed letter sequences into characters for languages
that do not use the Latin character set, and pipes the result to
.BR /dev/cons .
The language is selected by typing a control character:
.TP
.B ctl-t
return to default English mode (no transliteration).
.TP
.B ctl-n
Japanese hiragana: interpret lower-case letters as a Hepburn
representation of hiragana. In this mode, typing ctl-\\ looks up the
last `word' in a kana-kanji dictionary and replaces it.
Subsequent ctl-\\ characters cycle through the possibilities. A word
is the longest immediately preceding unbroken string of hiragana
characters.
.TP
.B ctl-k
Japanese katakana.
.TP
.B ctl-l
If you want to put the hiragana without modification.
.TP
.B ctl-x
Reload the in-memory kana-kanji conversion dictionary (kanji jisho).
This is so you can update the kanji jisho on-the-fly. By default, the
kanji jisho is read once at the beginning, to make a hash table, which
will be arranged so that the last selected candidate will be the first
candidate for later searches.
.TP
.B ctl-r
Russian: interpret letters as Cyrillic; the transliteration is mostly
phonetic, with
.B '
for
.IR myagkij-znak
(ь),
.B ''
for
.I tverdyj-znak
(ъ)
.I yo
for ё,
.B j
for
.IR i-kratkaya
(й).
.TP
.B ctl-o
Greek.
.TP
.B ctl-s
Korean.
.PP
To use
.I ktrans
you have to run it before a rio session. You can put it on your
$home/lib/profile like:
.EX
...
ktrans
rio -i riostart
...
.EE
or run it with a sub-rio on a window like:
.EX
% @{ktrans; rio}
.EE
.PP
The default location of the kanji jisho is
.LR /lib/kanji.jisho .
You can change that by means of the
.L $jisho
environment variable, so you can keep a customized version of the
dictionary that fits your personal needs.
.PP
.SH SOURCE
.B /sys/src/cmd/ktrans
.SH SEE ALSO
.IR rio (1)
.IR kbdfs (8)
.br
.IR /sys/src/cmd/ktrans/README.kenji
.br
.IR /sys/src/cmd/ktrans/READMEJ.kenji
.SH EXAMPLES
If you want to make the Japanese text as below:
.ft Jp
私は毎日35分以上歩いて、 更に10分電車に乗って学校に通います。
健康の維持にも役だっていますが、 なかなかたのしいものです。
.ft
your keyboard typing stream should be:
[^l]watashiHA[^\\]mainichi[^\\]35[^l]fun[^\\]ijou[^\\]aruIte,
[^\\]saraNI[^\\]10[^l]fun[^\\]denshaNI[^\\]noTte[^\\]gakkouNI
[^\\]kayoImasu.[^\\]kenkouNO[^\\]ijiNImo[^\\]yakuDAtteimasuga,
[^\\]nakanakatanoshiImonodesu.
where [^\\] and [^l] indicate 'ctl-\\' and 'ctl-l',
respectively. See README.kenji for the details of this Japanese input
method.
.SH BUGS
.PP
There is no way to generate the control characters literally. At the
beggining of a new line, you have to begin with ctl-l for successful
kana-kanji conversion.
.SH HISTORY
Ktrans was originally written by Kenji Okamoto in August of 2000 for
the 2nd edition of Plan 9. It was imported in to 9front in July of
2022, with patches by several contributors.

142
sys/man/4/ktrans Normal file
View file

@ -0,0 +1,142 @@
.TH KTRANS 4
.SH NAME
ktrans \- language transliterator
.SH SYNOPSIS
.B ktrans
[
.B -K
]
[
.B -k
.I kbd
]
[
.B -l
.I lang
]
[
.B -m
.I mnt
]
.nf
.IB /mnt/ktrans/kbd
.IB /mnt/ktrans/kbdin
.IB /mnt/ktrans/lang
.fi
.SH DESCRIPTION
.I ktrans
is a fileserver that provides a transliteration overlay to
.IR kbdfs (8).
When run,
.I ktrans
mounts itself to
.B /mnt/ktrans
and binds its own
.B kbd
and
.B kbdin
files over those present in
.BR /dev .
.PP
By default,
.I ktrans
also forks and reads input from the existing
.BR /dev/kbd .
The
.B -k
flag changes which file is read. The
.B -K
flag disables this process all together, limiting
input to only the
.B kbdin
file.
.SH CONVERSION
Conversion is done in two steps: An implicit layer
that is used for direct mappings between latin characters and
an explicit multi rune conversion used for compound mappings.
.I Ktrans
does implicit conversion by passing through characters
as they are input. Then when a sequence of input is matched,
backspaces are emitted to clear the input sequence and the matched
rune sequence is emitted. The last 'word' of input may be then
additionaly transliterated by typing ctrl-\\. A newline character also
performs this lookup, but additional newline characters will not
cycle through alternatives.
.SH CONTROL
The language is selected by typing a control character:
.TP
.B ctl-t
Passthrough mode
.TP
.B ctl-n
Japanese mode. Implicit layer converts hepburn sequences to hiragana. Explicit
layer converts sequences of hiragana with optional trailing particle or okurigana.
.TP
.B ctl-k
Implicit only Japanese Katakana layer.
.TP
.B ctrl-c
Chinese Wubi mode. No implicit conversion is done. Explicit layer
converts sequences of latin characters to hanzi.
.TP
.B ctl-l
Clear the explicit layer's current input sequence.
.TP
.B ctl-r
Russian mode. Implicit layer converts latin to Cyrillic; the transliteration is mostly
phonetic, with
.B '
for
.IR myagkij-znak
(ь),
.B ''
for
.I tverdyj-znak
(ъ)
.I yo
for ё,
.B j
for
.IR i-kratkaya
(й).
.TP
.B ctl-o
Greek mode.
.TP
.B ctl-s
Korean mode. Implicit layer converts latin to Korean Hangul.
.SH SOURCE
.B /sys/src/cmd/ktrans
.SH SEE ALSO
.IR rio (1)
.IR kbdfs (8)
.br
.IR /sys/src/cmd/ktrans/README.kenji
.br
.IR /sys/src/cmd/ktrans/READMEJ.kenji
.SH EXAMPLES
To type the following Japanese text:
.ft Jp
私は毎日35分以上歩いて、 更に10分電車に乗って学校に通います。
健康の維持にも役だっていますが、 なかなかたのしいものです。
.ft
your keyboard typing stream should be:
watashiHA[^\\]mainichi[^\\]35[^l]fun[^\\]ijou[^\\]aruIte,[^\\]
saraNI[^\\]10[^l]fun[^\\]denshaNI[^\\]noTte[^\\]gakkouNI[^\\]
kayoImasu.[\\n]kenkouNO[^\\]ijiNImo[^\\]yakuDAtteimasuga,[^\\]
nakanakatanoshiImonodesu.[\\n]
where [^\\] and [^l] indicate 'ctl-\\' and 'ctl-l',
respectively. See README.kenji for the details of this Japanese input
method.
.SH BUGS
.PP
There is no way to generate the control characters literally.
.SH HISTORY
Ktrans was originally written by Kenji Okamoto in August of 2000 for
the 2nd edition of Plan 9. It was imported in to 9front in July of
2022, with patches by several contributors.

401
sys/src/cmd/ktrans/fs.c Normal file
View file

@ -0,0 +1,401 @@
#include <u.h>
#include <libc.h>
#include <fcall.h>
#include <thread.h>
#include <9p.h>
#include "hash.h"
#include "ktrans.h"
static Channel *globalkbd;
static char *user;
char*
parsekbd(Channel *out, char *buf, int n)
{
char *p, *e;
Msg msg;
for(p = buf; p < buf+n;){
msg.code = p[0];
p++;
switch(msg.code){
case 'c': case 'k': case 'K':
break;
default:
return "malformed kbd message";
}
e = utfecpy(msg.buf, msg.buf + sizeof msg.buf, p);
if(e == msg.buf)
return "short command";
p += e - msg.buf;
p++;
if(send(out, &msg) == -1)
return nil;
}
return nil;
}
void
kbdproc(void *a)
{
char *s;
int fd, n;
char buf[128];
s = a;
fd = open(s, OREAD);
if(fd < 0){
fprint(2, "could not open file %s: %r", s);
chanclose(globalkbd);
return;
}
for(;;){
n = read(fd, buf, sizeof buf);
if(n < 3){
continue;
}
parsekbd(globalkbd, buf, n);
}
}
Trans*
spawntrans(int global)
{
Trans *t;
t = mallocz(sizeof *t, 1);
if(global)
t->input = globalkbd;
else
t->input = chancreate(sizeof(Msg), 0);
t->output = chancreate(sizeof(Msg), 0);
t->dict = chancreate(sizeof(Msg), 0);
t->done = chancreate(1, 0);
t->lang = chancreate(sizeof(char*), 0);
proccreate(keyproc, t, mainstacksize);
return t;
}
void
closetrans(Trans *t)
{
chanclose(t->input);
chanclose(t->output);
chanclose(t->dict);
/* wait for threads to exit */
recv(t->done, nil);
recv(t->done, nil);
chanfree(t->done);
chanfree(t->input);
chanfree(t->output);
chanfree(t->dict);
free(t);
}
enum{
Qroot,
Qkbd,
Qkbdin,
Qlang,
};
Dir dirtab[] = {
{.qid={Qroot, 0, QTDIR}, .mode=0555, .name="/"},
{.qid={Qkbd, 0, QTFILE}, .mode=0600, .name="kbd"},
{.qid={Qkbdin, 0, QTFILE}, .mode=0200, .name="kbdin"},
{.qid={Qlang, 0, QTFILE}, .mode=0600, .name="lang"},
};
static int
dirgen(int n, Dir *dir, void*)
{
n++;
if(n >= nelem(dirtab))
return -1;
*dir = dirtab[n];
dir->name = estrdup9p(dir->name);
dir->uid = estrdup9p(user);
dir->gid = estrdup9p(user);
dir->muid = estrdup9p(user);
return 0;
}
typedef struct Aux Aux;
struct Aux {
Ref;
Reqqueue *q;
Trans *t;
};
static void
fsattach(Req *r)
{
Aux *aux;
Trans *t;
char *aname;
/*
* Each attach allocates a new "keyboard".
* The global attach argument denotes to
* use /dev/kbd as the source of keyboard input.
*
* Sessions include one translation
* process, and one read queue. Since
* it is common for clients to constantly be
* blocked on the kbd file, we need to assign it to
* it's own process so we can service other requests
* in the meantime.
*/
aname = r->ifcall.aname;
if(aname != nil && strcmp(aname, "global") == 0)
t = spawntrans(1);
else
t = spawntrans(0);
aux = mallocz(sizeof *aux, 1);
aux->t = t;
aux->q = reqqueuecreate();
incref(aux);
r->fid->aux = aux;
r->ofcall.qid = dirtab[0].qid;
r->fid->qid = dirtab[0].qid;
respond(r, nil);
}
static void
fsopen(Req *r)
{
respond(r, nil);
}
static void
fskbd(Req *r)
{
Aux *aux;
Msg m;
char *p;
char buf[1+128], *bp;
Rune rn;
aux = r->fid->aux;
if(recv(aux->t->output, &m) == -1){
respond(r, "closing");
return;
}
if(m.code != 'c'){
bp = seprint(buf, buf + sizeof buf, "%c%s", m.code, m.buf);
goto Send;
}
p = m.buf;
bp = buf;
for(;bp < buf + sizeof buf;){
p += chartorune(&rn, p);
if(rn == Runeerror || rn == '\0')
break;
bp = seprint(bp, buf + sizeof buf, "c%C", rn);
bp++;
}
if(bp >= buf + sizeof buf){
while(*bp-- != '\0')
;
bp++;
}
Send:
r->ifcall.offset = 0;
readbuf(r, buf, (bp-buf)+1);
respond(r, nil);
}
static void
fsread(Req *r)
{
Aux *aux;
Msg m;
char *p;
aux = r->fid->aux;
switch(r->fid->qid.path){
case Qroot:
dirread9p(r, dirgen, nil);
respond(r, nil);
break;
case Qkbd:
reqqueuepush(aux->q, r, fskbd);
break;
case Qlang:
m.code = 'q';
m.buf[0] = '\0';
if(send(aux->t->input, &m) == -1){
respond(r, "closing");
break;
}
if(recv(aux->t->lang, &p) == -1){
respond(r, "closing");
break;
}
snprint(m.buf, sizeof m.buf, "%s\n", p);
readstr(r, m.buf);
respond(r, nil);
break;
default:
respond(r, "bad op");
break;
}
}
static void
fswrite(Req *r)
{
Aux *aux;
int n, lang;
char *err, *p;
Msg m;
aux = r->fid->aux;
n = r->ifcall.count;
switch(r->fid->qid.path){
case Qkbdin:
if(n < 3){
respond(r, "short write");
return;
}
err = parsekbd(aux->t->input, r->ifcall.data, n);
if(err != nil){
respond(r, err);
return;
}
break;
case Qlang:
if(n >= sizeof m.buf){
respond(r, "large write");
return;
}
memmove(m.buf, r->ifcall.data, n);
m.buf[n] = '\0';
p = strchr(m.buf, '\n');
if(p != nil)
*p = '\0';
lang = parselang(m.buf);
if(lang < 0){
respond(r, "unkonwn lang");
return;
}
m.buf[0] = lang;
m.buf[1] = '\0';
m.code = 'c';
send(aux->t->input, &m);
}
r->ofcall.count = n;
respond(r, nil);
}
static void
fsstat(Req *r)
{
if(dirgen(r->fid->qid.path - 1, &r->d, nil) == -1)
respond(r, "invalid fid");
else
respond(r, nil);
}
static char*
fswalk1(Fid *fid, char *name, Qid *qid)
{
int i;
if(fid->qid.path != Qroot)
return "walk from non root";
for(i = 0; i < nelem(dirtab); i++)
if(strcmp(name, dirtab[i].name) == 0){
*qid = dirtab[i].qid;
break;
}
if(i == nelem(dirtab))
return "file does not exist";
fid->qid = *qid;
return nil;
}
static char*
fsclone(Fid *oldfid, Fid *newfid)
{
Aux *aux;
aux = oldfid->aux;
incref(aux);
newfid->aux = aux;
return nil;
}
static void
fidclunk(Fid *fid)
{
Aux *aux;
aux = fid->aux;
if(decref(aux) != 0)
return;
closetrans(aux->t);
reqqueuefree(aux->q);
}
static Srv fs = {
.attach=fsattach,
.open=fsopen,
.read=fsread,
.write=fswrite,
.stat=fsstat,
.walk1=fswalk1,
.clone=fsclone,
.destroyfid=fidclunk,
};
void
launchfs(char *srv, char *mnt, char *kbd)
{
int fd;
char buf[128];
user = getenv("user");
if(user == nil)
user = "glenda";
if(kbd != nil){
globalkbd = chancreate(sizeof(Msg), 0);
proccreate(kbdproc, kbd, mainstacksize);
}
fd = threadpostsrv(&fs, srv);
if(fd < 0)
sysfatal("postsrv %r");
if(kbd != nil){
if(mount(fd, -1, mnt, MREPL, "global") < 0)
sysfatal("mount %r");
snprint(buf, sizeof buf, "%s/kbd", mnt);
if(bind(buf, "/dev/kbd", MREPL) < 0)
sysfatal("bind %r");
snprint(buf, sizeof buf, "%s/kbdin", mnt);
if(bind(buf, "/dev/kbdin", MREPL) < 0)
sysfatal("bind %r");
} else
if(mount(fd, -1, mnt, MREPL, "") < 0)
sysfatal("mount %r");
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -6,5 +6,6 @@ HFILES=ktrans.h
OFILES=\
hash.$O\
main.$O\
fs.$O\
</sys/src/cmd/mkone