mirror of
git://git.9front.org/plan9front/plan9front
synced 2025-01-12 11:10:06 +00:00
ktrans: 你好
This consolidates jisho and map lookups to use the same structure and removes the old jisho code.
This commit is contained in:
parent
ccbabf1c16
commit
c147614656
8 changed files with 86361 additions and 3178 deletions
83100
lib/hanzi.zidian
Normal file
83100
lib/hanzi.zidian
Normal file
File diff suppressed because it is too large
Load diff
210
sys/src/cmd/ktrans/hash.c
Normal file
210
sys/src/cmd/ktrans/hash.c
Normal file
|
@ -0,0 +1,210 @@
|
|||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include "hash.h"
|
||||
|
||||
typedef struct Hnode Hnode;
|
||||
struct Hnode {
|
||||
int filled;
|
||||
int next;
|
||||
void *key;
|
||||
};
|
||||
|
||||
enum{
|
||||
Tagsize = sizeof(Hnode),
|
||||
};
|
||||
|
||||
uvlong
|
||||
shash(char *s)
|
||||
{
|
||||
uvlong hash;
|
||||
|
||||
hash = 7;
|
||||
for(; *s; s++)
|
||||
hash = hash*31 + *s;
|
||||
return hash;
|
||||
}
|
||||
|
||||
Hmap*
|
||||
hmapalloc(int nbuckets, int size)
|
||||
{
|
||||
void *store;
|
||||
Hmap *h;
|
||||
int nsz;
|
||||
|
||||
nsz = Tagsize + size;
|
||||
store = mallocz(sizeof(*h) + (nbuckets * nsz), 1);
|
||||
if(store == nil)
|
||||
return nil;
|
||||
|
||||
h = store;
|
||||
h->nbs = nbuckets;
|
||||
h->nsz = nsz;
|
||||
h->len = h->cap = nbuckets;
|
||||
|
||||
h->nodes = store;
|
||||
h->nodes += sizeof(*h);
|
||||
return store;
|
||||
}
|
||||
|
||||
int
|
||||
hmapset(Hmap **store, char *key, void *new, void *old)
|
||||
{
|
||||
Hnode *n;
|
||||
uchar *v;
|
||||
uchar *oldv;
|
||||
Hmap *h;
|
||||
int next;
|
||||
vlong diff;
|
||||
|
||||
h = *store;
|
||||
oldv = nil;
|
||||
v = h->nodes + (shash(key)%h->nbs) * h->nsz;
|
||||
for(;;){
|
||||
n = (Hnode*)v;
|
||||
next = n->next;
|
||||
|
||||
if(n->filled == 0)
|
||||
goto replace;
|
||||
if(strcmp(n->key, key) == 0){
|
||||
oldv = v + Tagsize;
|
||||
goto replace;
|
||||
}
|
||||
if(next == 0)
|
||||
break;
|
||||
v = h->nodes + next*h->nsz;
|
||||
}
|
||||
|
||||
if(h->cap == h->len){
|
||||
/* figure out way back from a relocation */
|
||||
diff = v - h->nodes;
|
||||
|
||||
h->cap *= 2;
|
||||
*store = realloc(*store, sizeof(*h) + h->cap*h->nsz);
|
||||
h = *store;
|
||||
h->nodes = (uchar*)*store + sizeof(*h);
|
||||
memset(h->nodes + h->len*h->nsz, 0, h->nsz);
|
||||
|
||||
v = h->nodes + diff;
|
||||
n = (Hnode*)v;
|
||||
}
|
||||
n->next = h->len;
|
||||
h->len++;
|
||||
assert(h->len <= h->cap);
|
||||
v = h->nodes + n->next*h->nsz;
|
||||
n = (Hnode*)v;
|
||||
|
||||
replace:
|
||||
memmove(v + Tagsize, new, h->nsz - Tagsize);
|
||||
n->filled++;
|
||||
n->key = key;
|
||||
n->next = next;
|
||||
if(old != nil && oldv != nil){
|
||||
memmove(old, oldv, h->nsz - Tagsize);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void*
|
||||
_hmapget(Hmap *h, char *key)
|
||||
{
|
||||
Hnode *n;
|
||||
uchar *v;
|
||||
|
||||
v = h->nodes + (shash(key)%h->nbs)*h->nsz;
|
||||
for(;;){
|
||||
n = (Hnode*)v;
|
||||
if(n->filled != 0 && strcmp(n->key, key) == 0)
|
||||
return v;
|
||||
if(n->next == 0)
|
||||
break;
|
||||
v = h->nodes + n->next*h->nsz;
|
||||
}
|
||||
return nil;
|
||||
}
|
||||
|
||||
int
|
||||
hmapget(Hmap *h, char *key, void *dst)
|
||||
{
|
||||
uchar *v;
|
||||
|
||||
v = _hmapget(h, key);
|
||||
if(v == nil)
|
||||
return -1;
|
||||
if(dst != nil)
|
||||
memmove(dst, v + Tagsize, h->nsz - Tagsize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hmapdel(Hmap *h, char *key, void *dst, int freekey)
|
||||
{
|
||||
uchar *v;
|
||||
Hnode *n;
|
||||
|
||||
v = _hmapget(h, key);
|
||||
if(v == nil)
|
||||
return -1;
|
||||
|
||||
n = (Hnode*)v;
|
||||
n->filled = 0;
|
||||
if(freekey)
|
||||
free(n->key);
|
||||
if(dst != nil)
|
||||
memmove(dst, v + Tagsize, h->nsz - Tagsize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char*
|
||||
hmapkey(Hmap *h, char *key)
|
||||
{
|
||||
uchar *v;
|
||||
Hnode *n;
|
||||
|
||||
v = _hmapget(h, key);
|
||||
if(v == nil)
|
||||
return nil;
|
||||
|
||||
n = (Hnode*)v;
|
||||
return n->key;
|
||||
}
|
||||
|
||||
Hmap*
|
||||
hmaprehash(Hmap *old, int buckets)
|
||||
{
|
||||
int i;
|
||||
uchar *v;
|
||||
Hnode *n;
|
||||
Hmap *new;
|
||||
|
||||
if(buckets == 0)
|
||||
buckets = old->len;
|
||||
|
||||
new = hmapalloc(buckets, old->nsz - Tagsize);
|
||||
for(i=0 ; i < old->len; i++){
|
||||
v = old->nodes + i*old->nsz;
|
||||
n = (Hnode*)v;
|
||||
hmapset(&new, n->key, v + Tagsize, nil);
|
||||
}
|
||||
free(old);
|
||||
return new;
|
||||
}
|
||||
|
||||
void
|
||||
hmapreset(Hmap *h, int freekeys)
|
||||
{
|
||||
Hnode *n;
|
||||
uchar *v;
|
||||
int i;
|
||||
|
||||
for(i=0; i < h->len; i++){
|
||||
v = h->nodes + i*h->nsz;
|
||||
n = (Hnode*)v;
|
||||
if(n->filled == 0)
|
||||
continue;
|
||||
if(freekeys)
|
||||
free(n->key);
|
||||
n->filled = 0;
|
||||
}
|
||||
h->len = 0;
|
||||
}
|
23
sys/src/cmd/ktrans/hash.h
Normal file
23
sys/src/cmd/ktrans/hash.h
Normal file
|
@ -0,0 +1,23 @@
|
|||
typedef union Hkey Hkey;
|
||||
union Hkey {
|
||||
void *p;
|
||||
int v;
|
||||
};
|
||||
|
||||
typedef struct Hmap Hmap;
|
||||
struct Hmap {
|
||||
int nbs;
|
||||
int nsz;
|
||||
|
||||
int len;
|
||||
int cap;
|
||||
uchar *nodes;
|
||||
};
|
||||
|
||||
Hmap* hmapalloc(int nbuckets, int size);
|
||||
int hmapget(Hmap *h, char *key, void *dst);
|
||||
int hmapset(Hmap **h, char *key, void *new, void *old);
|
||||
int hmapdel(Hmap *h, char *key, void *dst, int freekey);
|
||||
void hmapfree(Hmap *h, int freekeys);
|
||||
char* hmapkey(Hmap *h, char *key);
|
||||
void hmapreset(Hmap *h, int freekeys);
|
|
@ -1,211 +0,0 @@
|
|||
/*
|
||||
* open jisho file, and set the size of this jisho etc
|
||||
*
|
||||
* Kenji Okamoto August 4, 2000
|
||||
* Osaka Prefecture Univ.
|
||||
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||
*/
|
||||
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "jisho.h"
|
||||
|
||||
Dictionary *openQDIC(char *);
|
||||
void freeQDIC(Dictionary*);
|
||||
KouhoList *getKouhoHash(Dictionary*, char *);
|
||||
KouhoList *getKouhoFile(DicList*, char *);
|
||||
void selectKouho(KouhoList **, KouhoList*);
|
||||
int hashVal(char *);
|
||||
void addHash(Hash **, DicList*);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Open QuickDIC (hashed personal dictionary)
|
||||
* open skk styled ktrans dictionary file, and make its hash table
|
||||
* based on individual header kana strings
|
||||
*
|
||||
* KouhoList
|
||||
* |---------|
|
||||
* Hash |---->kouho---->kouhotop
|
||||
* |-------| |
|
||||
* dic---->dhash---->dicindex---->kanahead
|
||||
* |--------| |--------|
|
||||
* Dictionary DicList
|
||||
*
|
||||
*/
|
||||
Dictionary *
|
||||
openQDIC(char *dicname)
|
||||
{
|
||||
Biobuf *f;
|
||||
void *Bbuf;
|
||||
Dictionary *dic;
|
||||
DicList *dicitem; /* for a future extension */
|
||||
char buf[1024], *startstr, *endstr;
|
||||
int i;
|
||||
|
||||
SET(dicitem); /* yes, I know I'm wrong, but... */
|
||||
|
||||
dic = (Dictionary*)malloc(sizeof(Dictionary));
|
||||
/* make room for pointer array (size=HASHSIZE) of hash table */
|
||||
for(i=0; i< HASHSIZE; i++) dic->dhash[i] = 0;
|
||||
dic->dlist = 0; /* for a future extension (more than one dics ^_^ */
|
||||
|
||||
if ((f = Bopen(dicname, OREAD)) == 0)
|
||||
return dic;
|
||||
|
||||
/* make hash table by the dic's header word */
|
||||
|
||||
while(Bbuf = Brdline(f, '\n')) {
|
||||
strncpy(buf, (char *)Bbuf, Blinelen(f));
|
||||
|
||||
if (buf[0] == ';') /* comment line */
|
||||
continue;
|
||||
else {
|
||||
/* get header word from jisho */
|
||||
startstr = buf;
|
||||
if(!(endstr = utfutf(startstr, "\t"))) break;
|
||||
*endstr = '\0';
|
||||
/* dicitem includes each header word from the jisho */
|
||||
|
||||
dicitem = (DicList*)malloc(sizeof(DicList)+(endstr-startstr+1));
|
||||
dicitem->nextitem = 0; /* for a future extension */
|
||||
strcpy(dicitem->kanahead, startstr);
|
||||
|
||||
dicitem->kouho = getKouhoFile(dicitem, endstr); /* read kouho from jisho */
|
||||
addHash(dic->dhash, dicitem);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
dic->dlist = dicitem;
|
||||
Bterm(f);
|
||||
return dic;
|
||||
}
|
||||
|
||||
/*
|
||||
* free dynamically allocated memory
|
||||
*/
|
||||
void
|
||||
freeQDIC(Dictionary *dic)
|
||||
{
|
||||
Hash *hash1, *hash2;
|
||||
DicList *dlist, *dlist2;
|
||||
int l;
|
||||
|
||||
for (dlist = dic->dlist;
|
||||
dlist != 0;
|
||||
dlist2 = dlist, dlist = dlist->nextitem, free((void *)dlist2));
|
||||
for (l = 0; l < HASHSIZE; l++) {
|
||||
for (hash1 = dic->dhash[l]; hash1; hash1 = hash2) {
|
||||
if (hash1->next !=0) {
|
||||
hash2 = hash1->next;
|
||||
free((void *)hash1);
|
||||
}else
|
||||
break;
|
||||
}
|
||||
}
|
||||
free((void *)dic);
|
||||
}
|
||||
|
||||
int
|
||||
hashVal(char *s)
|
||||
{
|
||||
uint h;
|
||||
|
||||
h = 0x811c9dc5;
|
||||
while(*s != 0)
|
||||
h = (h^(uchar)*s++) * 0x1000193;
|
||||
return h % HASHSIZE;
|
||||
}
|
||||
|
||||
void
|
||||
addHash(Hash **hash, DicList *ditem)
|
||||
{
|
||||
Hash *h;
|
||||
int v;
|
||||
|
||||
v = hashVal(ditem->kanahead);
|
||||
h = (Hash*)malloc(sizeof(Hash));
|
||||
h->dicindex = ditem;
|
||||
h->length = strlen(ditem->kanahead);
|
||||
h->next = hash[v];
|
||||
hash[v] = h;
|
||||
}
|
||||
|
||||
/*
|
||||
* read Kouho list from the jisho file defined by Biobuf descriptor f
|
||||
*
|
||||
* revised for Plan 9 by K.Okamoto
|
||||
*/
|
||||
KouhoList *
|
||||
getKouhoFile(DicList *dicitem, char * endstr)
|
||||
{
|
||||
char *kouhostart, *kouhoend;
|
||||
KouhoList *kouhoitem, *currntkouhoitem=0, *prevkouhoitem;
|
||||
|
||||
prevkouhoitem = 0;
|
||||
kouhostart = endstr + 1;
|
||||
while((kouhoend = utfutf(kouhostart, " ")) ||
|
||||
(kouhoend = utfutf(kouhostart, "\n"))) {
|
||||
*kouhoend = '\0';
|
||||
|
||||
kouhoitem = (KouhoList*)malloc(sizeof(KouhoList)+(kouhoend-kouhostart+1));
|
||||
kouhoitem->nextkouho = 0;
|
||||
kouhoitem->prevkouho = prevkouhoitem;
|
||||
kouhoitem->dicitem = dicitem;
|
||||
strcpy(kouhoitem->kouhotop, kouhostart);
|
||||
if (prevkouhoitem)
|
||||
prevkouhoitem->nextkouho = kouhoitem;
|
||||
else
|
||||
currntkouhoitem = kouhoitem;
|
||||
prevkouhoitem = kouhoitem;
|
||||
kouhostart = kouhoend + 1;
|
||||
}
|
||||
return currntkouhoitem;
|
||||
}
|
||||
|
||||
/*
|
||||
* get matched kouho from the hash table of header word of the dict
|
||||
* if found, returns pointer to the first candidate in the hash table.
|
||||
* if not found, returns 0.
|
||||
*
|
||||
* from getCand() in skklib.c by Akinori Ito et al.,(aito@ei5sun.yz.yamagata-u.ac.jp)
|
||||
*/
|
||||
KouhoList *
|
||||
getKouhoHash(Dictionary *dic, char *s)
|
||||
{
|
||||
int l, v;
|
||||
Hash *h;
|
||||
|
||||
l = strlen(s);
|
||||
v = hashVal(s);
|
||||
for (h = dic->dhash[v]; h != 0; h = h->next) {
|
||||
if (h->length != l ||
|
||||
strcmp(h->dicindex->kanahead, s)) continue;
|
||||
return h->dicindex->kouho; /* return matched kouho */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* from skklib.c by Akinori Ito et al.,(aito@ei5sun.yz.yamagata-u.ac.jp)
|
||||
* just modified to read easier for current purpose
|
||||
*/
|
||||
void
|
||||
selectKouho(KouhoList **first, KouhoList *current)
|
||||
{
|
||||
/* take off currentkouho from the kouholist table */
|
||||
if (current->prevkouho) {
|
||||
current->prevkouho->nextkouho = current->nextkouho;
|
||||
if (current->nextkouho)
|
||||
current->nextkouho->prevkouho = current->prevkouho;
|
||||
current->prevkouho = 0;
|
||||
}
|
||||
/* take place of firstkouho by currentkouho */
|
||||
if (*first != current) {
|
||||
(*first)->prevkouho = current;
|
||||
current->nextkouho = *first;
|
||||
*first = current;
|
||||
}
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
/*
|
||||
* Kenji Okamoto August 4, 2000
|
||||
* Osaka Prefecture Univ.
|
||||
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||
*/
|
||||
|
||||
#define HASHSIZE 257
|
||||
|
||||
/*
|
||||
* Structure for Dictionary's header word (in Hiragana)
|
||||
*/
|
||||
typedef struct DicList DicList;
|
||||
struct DicList {
|
||||
struct KouhoList *kouho;
|
||||
struct DicList *nextitem; /* for a future extension */
|
||||
char kanahead[1];
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure for Kouho of each index word in the dictionary
|
||||
*/
|
||||
typedef struct KouhoList KouhoList;
|
||||
struct KouhoList {
|
||||
struct KouhoList *nextkouho;
|
||||
struct KouhoList *prevkouho;
|
||||
struct DicList *dicitem;
|
||||
char kouhotop[1]; /* top of the kouhos */
|
||||
} ;
|
||||
|
||||
typedef struct Hash Hash;
|
||||
struct Hash {
|
||||
DicList *dicindex; /* pointer to a KouhoList and kanahead etc */
|
||||
short length;
|
||||
struct Hash *next;
|
||||
};
|
||||
|
||||
typedef struct Dictionary Dictionary;
|
||||
struct Dictionary {
|
||||
DicList *dlist; /* for a future extension, having more than one dictionaries */
|
||||
Hash *dhash[HASHSIZE];
|
||||
};
|
File diff suppressed because it is too large
Load diff
|
@ -6,16 +6,26 @@
|
|||
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||
*/
|
||||
|
||||
/*
|
||||
* A glossary on some of the Japanese vocabulary used:
|
||||
* kana: syllabic letting, either hiragana(ひらがな) or katakana(カタカナ)
|
||||
* kanji(漢字): borrowed characters, 楽 in 楽しい
|
||||
* Okurigana(送り仮名): kana tail to kanji, しい in 楽しい
|
||||
* Joshi(助詞): particle, は in 私は
|
||||
* Jisho(辞書): dictionary
|
||||
* kouho(候補): candidate
|
||||
*/
|
||||
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "hash.h"
|
||||
#include "ktrans.h"
|
||||
#include "jisho.h"
|
||||
|
||||
#define LSIZE 256
|
||||
|
||||
Rune lbuf[LSIZE]; /* hiragana buffer for key input written by send() */
|
||||
Map *table = hira; /* default language conversion table */
|
||||
Hmap *table;
|
||||
uchar okurigana[LSIZE]; /* buffer for okurigana */
|
||||
char okuri = 0; /* buffer/flag for capital input char */
|
||||
int in, out;
|
||||
|
@ -23,16 +33,10 @@ int llen, olen, joshi = 0;
|
|||
int natural = 1; /* not Japanese but English mode */
|
||||
|
||||
int changelang(int);
|
||||
int dotrans(Dictionary*);
|
||||
int dotrans(Hmap*);
|
||||
int nrune(char *);
|
||||
void send(uchar *, int);
|
||||
Map *match(uchar *p, int *nc, Map *table);
|
||||
|
||||
extern Dictionary *openQDIC(char *);
|
||||
extern KouhoList *getKouhoHash(Dictionary*, char *);
|
||||
extern KouhoList *getKouhoFile(DicList*, char *);
|
||||
extern void freeQDIC(Dictionary*);
|
||||
extern void selectKouho(KouhoList **, KouhoList*);
|
||||
Hmap* opendict(Hmap *, char *);
|
||||
|
||||
void
|
||||
kbdopen(void)
|
||||
|
@ -89,6 +93,50 @@ kbdopen(void)
|
|||
exits(nil);
|
||||
}
|
||||
|
||||
Map signalmore = {
|
||||
"_", nil, 1,
|
||||
};
|
||||
|
||||
Hmap*
|
||||
initmap(Map *m, int n)
|
||||
{
|
||||
int i, j;
|
||||
char buf[16];
|
||||
char *s;
|
||||
Map prev;
|
||||
Hmap *h;
|
||||
|
||||
h = hmapalloc(n, sizeof(Map));
|
||||
for(i = 0; i < n; i++){
|
||||
if(m[i].roma == nil || m[i].roma[0] == '\0')
|
||||
continue;
|
||||
|
||||
//We mark all partial strings so we know when
|
||||
//we have partial match when ingesting.
|
||||
j = 2;
|
||||
for(s = m[i].roma; *s && j <= sizeof buf; s++){
|
||||
snprint(buf, j, "%s", m[i].roma);
|
||||
prev = m[i];
|
||||
if(hmapget(h, buf, &prev) == 0){
|
||||
if(prev.leadstomore == 1 && s[1] == '\0'){
|
||||
//confict; partial & valid input
|
||||
prev = m[i];
|
||||
prev.leadstomore = 1;
|
||||
free(hmapkey(h, buf));
|
||||
}
|
||||
}
|
||||
|
||||
if(s[1] == '\0'){
|
||||
hmapset(&h, strdup(buf), &prev, nil);
|
||||
} else {
|
||||
hmapset(&h, strdup(buf), &signalmore, nil);
|
||||
}
|
||||
j++;
|
||||
}
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
void
|
||||
usage(void)
|
||||
{
|
||||
|
@ -101,11 +149,11 @@ main(int argc, char *argv[])
|
|||
{
|
||||
|
||||
uchar *bp, *ep, buf[128];
|
||||
Map *mp;
|
||||
int nchar, wantmore;
|
||||
Map lkup, last;
|
||||
int wantmore;
|
||||
int n, c;
|
||||
char *dictname;
|
||||
Dictionary *jisho;
|
||||
char *jishoname, *zidianname;
|
||||
Hmap *jisho, *zidian;
|
||||
|
||||
ARGBEGIN{
|
||||
default: usage();
|
||||
|
@ -113,9 +161,20 @@ main(int argc, char *argv[])
|
|||
if(argc != 0)
|
||||
usage();
|
||||
|
||||
if((dictname = getenv("jisho")) == nil)
|
||||
dictname = "/lib/kanji.jisho";
|
||||
jisho = openQDIC(dictname);
|
||||
if((jishoname = getenv("jisho")) == nil)
|
||||
jishoname = "/lib/kanji.jisho";
|
||||
jisho = opendict(nil, jishoname);
|
||||
|
||||
if((zidianname = getenv("zidian")) == nil)
|
||||
zidianname = "/lib/hanzi.zidian";
|
||||
zidian = opendict(nil, zidianname);
|
||||
|
||||
hira = table = initmap(mhira, nelem(mhira));
|
||||
kata = initmap(mkata, nelem(mkata));
|
||||
greek = initmap(mgreek, nelem(mgreek));
|
||||
cyril = initmap(mcyril, nelem(mcyril));
|
||||
hangul = initmap(mhangul, nelem(mhangul));
|
||||
last = (Map){nil, nil, -1};
|
||||
|
||||
kbdopen();
|
||||
if(fork())
|
||||
|
@ -147,8 +206,8 @@ main(int argc, char *argv[])
|
|||
wantmore = 0;
|
||||
|
||||
if (*bp=='') { /* ^x read ktrans-jisho once more */
|
||||
freeQDIC(jisho);
|
||||
jisho = openQDIC(dictname);
|
||||
jisho = opendict(jisho, jishoname);
|
||||
zidian = opendict(zidian, zidianname);
|
||||
llen = 0;
|
||||
olen = okuri = joshi = 0;
|
||||
wantmore=0;
|
||||
|
@ -156,7 +215,10 @@ main(int argc, char *argv[])
|
|||
continue;
|
||||
}
|
||||
if (*bp=='') { /* ^\ (start translation command) */
|
||||
c = dotrans(jisho);
|
||||
if (table == hanzi)
|
||||
c = dotrans(zidian);
|
||||
else
|
||||
c = dotrans(jisho);
|
||||
if (c)
|
||||
*bp = c; /* pointer to translated rune */
|
||||
else
|
||||
|
@ -167,11 +229,13 @@ main(int argc, char *argv[])
|
|||
bp++;
|
||||
llen = 0;
|
||||
olen = okuri = joshi = 0;
|
||||
last.kana = nil;
|
||||
continue;
|
||||
}
|
||||
if (changelang(*bp)) { /* change language mode OK */
|
||||
bp++;
|
||||
olen = okuri = joshi = 0;
|
||||
last.kana = nil;
|
||||
continue;
|
||||
}
|
||||
if (natural || *bp<=' ' || *bp>='{') { /* English mode but not ascii */
|
||||
|
@ -179,6 +243,7 @@ main(int argc, char *argv[])
|
|||
int rlen = chartorune(&r, (char *)bp);
|
||||
send(bp, rlen); /* write bp to /dev/cons */
|
||||
bp += rlen;
|
||||
last.kana = nil;
|
||||
continue;
|
||||
}
|
||||
if (table == hira && (*bp >= 'A' && *bp <= 'Z') && (*(bp+1) < 'A'
|
||||
|
@ -192,27 +257,33 @@ main(int argc, char *argv[])
|
|||
joshi = 1;
|
||||
olen = 0;
|
||||
}
|
||||
mp = match(bp, &nchar, table);
|
||||
if (mp == 0) {
|
||||
if (nchar>0) { /* match, longer possible */
|
||||
wantmore++;
|
||||
break;
|
||||
}
|
||||
send(bp++, 1); /* alphabet in kana mode */
|
||||
} else {
|
||||
send((uchar*)mp->kana, strlen(mp->kana));
|
||||
bp += nchar;
|
||||
if(hmapget(table, (char*)bp, &lkup) < 0){
|
||||
if(last.kana != nil){
|
||||
send((uchar*)last.kana, strlen(last.kana));
|
||||
bp += strlen(last.roma);
|
||||
} else
|
||||
send(bp++, 1);
|
||||
last.kana = nil;
|
||||
break;
|
||||
}
|
||||
/* concatinations; only advance a single character */
|
||||
if(lkup.kana != nil && strstr("ッっ", lkup.kana))
|
||||
lkup.roma = "_";
|
||||
/* partial match */
|
||||
if(lkup.kana == nil || lkup.leadstomore == 1){
|
||||
if(lkup.kana != nil)
|
||||
last = lkup;
|
||||
|
||||
wantmore = 1;
|
||||
break;
|
||||
}
|
||||
last.kana = nil;
|
||||
send((uchar*)lkup.kana, strlen(lkup.kana));
|
||||
bp += strlen(lkup.roma);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
min(int a, int b)
|
||||
{
|
||||
return a<b? a: b;
|
||||
}
|
||||
|
||||
/*
|
||||
* send UTF string (p) with length (n) to stdout
|
||||
* and write rune (r) in global lbuf[] buffer
|
||||
|
@ -232,7 +303,9 @@ send(uchar *p, int n)
|
|||
llen -= 64;
|
||||
}
|
||||
|
||||
if (table!=hira || natural)
|
||||
if(table != hira && table != hanzi)
|
||||
return;
|
||||
if(natural && table != hanzi)
|
||||
return;
|
||||
|
||||
ep = p+n;
|
||||
|
@ -253,49 +326,13 @@ send(uchar *p, int n)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Romaji to Hiragana/Katakana conversion
|
||||
* romaji shoud be input as small letter
|
||||
* returns the matched address in table, hira, kata, etc.
|
||||
* nc: number of character (return value)
|
||||
*/
|
||||
Map *
|
||||
match(uchar *p, int *nc, Map *table)
|
||||
{
|
||||
register Map *longp = 0, *kp;
|
||||
static char last;
|
||||
int longest = 0;
|
||||
|
||||
*nc = -1;
|
||||
for (kp=table; kp->roma; kp++) {
|
||||
if (*p == *kp->roma) {
|
||||
int lr = strlen(kp->roma);
|
||||
int len = min(lr, strlen((char *)p));
|
||||
if (strncmp(kp->roma, (char *)p, len)==0) {
|
||||
if (len<lr) {
|
||||
*nc = 1;
|
||||
return 0;
|
||||
}
|
||||
if (len>longest) {
|
||||
longest = len;
|
||||
longp = kp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (longp) {
|
||||
last = longp->roma[longest-1];
|
||||
*nc = longp->advance;
|
||||
}
|
||||
return longp;
|
||||
}
|
||||
|
||||
int
|
||||
changelang(int c)
|
||||
{
|
||||
switch(c){
|
||||
case '': /* ^t (English mode) */
|
||||
natural = 1;
|
||||
table = hira;
|
||||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
|
@ -334,23 +371,80 @@ changelang(int c)
|
|||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
|
||||
case '': /* ^c (Chinese mode) */
|
||||
natural = 1;
|
||||
table = hanzi;
|
||||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Hmap*
|
||||
opendict(Hmap *h, char *name)
|
||||
{
|
||||
Biobuf *b;
|
||||
char *p;
|
||||
char *dot, *rest;
|
||||
char *kouho[16];
|
||||
int i;
|
||||
|
||||
b = Bopen(name, OREAD);
|
||||
if(b == nil)
|
||||
return nil;
|
||||
|
||||
if(h == nil)
|
||||
h = hmapalloc(8192, sizeof(kouho));
|
||||
else
|
||||
hmapreset(h, 1);
|
||||
while(p = Brdstr(b, '\n', 1)){
|
||||
if(p[0] == '\0' || p[0] == ';'){
|
||||
Err:
|
||||
free(p);
|
||||
continue;
|
||||
}
|
||||
dot = utfrune(p, '\t');
|
||||
if(dot == nil)
|
||||
goto Err;
|
||||
|
||||
*dot = '\0';
|
||||
rest = dot+1;
|
||||
if(*rest == '\0')
|
||||
goto Err;
|
||||
|
||||
memset(kouho, 0, sizeof kouho);
|
||||
i = 0;
|
||||
while(i < nelem(kouho)-1 && (dot = utfrune(rest, ' '))){
|
||||
*dot = '\0';
|
||||
kouho[i++] = rest;
|
||||
rest = dot+1;
|
||||
}
|
||||
if(i < nelem(kouho)-1)
|
||||
kouho[i] = rest;
|
||||
|
||||
/* key is the base pointer; overwrites clean up for us */
|
||||
hmapset(&h, p, kouho, nil);
|
||||
}
|
||||
Bterm(b);
|
||||
return h;
|
||||
}
|
||||
|
||||
/*
|
||||
* write translated kanji runes to stdout and return last character
|
||||
* if it's not ctl-\. if the last is ctl-\, proceed with
|
||||
* translation of the next kouho
|
||||
*/
|
||||
int
|
||||
dotrans(Dictionary *dic)
|
||||
dotrans(Hmap *dic)
|
||||
{
|
||||
Rune *res, r[1];
|
||||
char v[1024], *p, tbuf[64], hirabuf[64];
|
||||
int j, lastlen, nokouho = 0;
|
||||
char ch;
|
||||
KouhoList *fstkouho, *currentkouho;
|
||||
int i;
|
||||
char *kouho[16];
|
||||
|
||||
if (llen==0)
|
||||
return 0; /* don't use kanji transform function */
|
||||
|
@ -375,15 +469,13 @@ dotrans(Dictionary *dic)
|
|||
if (okuri && joshi != 1) /* verb mode */
|
||||
hirabuf[strlen(hirabuf) - 1] = '\0';
|
||||
|
||||
if(!(fstkouho = getKouhoHash(dic, v))) { /* not found */
|
||||
if(hmapget(dic, v, kouho) < 0){
|
||||
llen = olen = okuri = joshi = 0;
|
||||
okurigana[0] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
currentkouho = fstkouho;
|
||||
for(;;) {
|
||||
p = currentkouho->kouhotop; /* p to the head of kanji kouho array */
|
||||
for(i = 0; i < nelem(kouho) && kouho[i] != nil; i++) {
|
||||
p = kouho[i];
|
||||
lastlen = nrune(tbuf); /* number of rune chars */
|
||||
|
||||
if (okuri && joshi != 1) /* verb mode */
|
||||
|
@ -407,10 +499,9 @@ dotrans(Dictionary *dic)
|
|||
exits(nil);
|
||||
|
||||
if (ch == '') { /* if next input is ^\, once again */
|
||||
if(currentkouho->nextkouho != 0) { /* have next kouho */
|
||||
if(i+1 < nelem(kouho) && kouho[i+1] != nil) { /* have next kouho */
|
||||
nokouho = 0;
|
||||
strcpy(tbuf, p);
|
||||
currentkouho = currentkouho->nextkouho;
|
||||
|
||||
if (okuri && joshi != 1) /* verb mode */
|
||||
for (j=0; j<nrune(tbuf); j++)
|
||||
|
@ -442,8 +533,12 @@ dotrans(Dictionary *dic)
|
|||
break;
|
||||
}
|
||||
} else {
|
||||
if(!nokouho) /* learn the previous use of the kouho */
|
||||
selectKouho(&(fstkouho->dicitem->kouho), currentkouho);
|
||||
if(!nokouho && i != 0){ /* learn the previous use of the kouho */
|
||||
p = kouho[0];
|
||||
kouho[0] = kouho[i];
|
||||
kouho[i] = p;
|
||||
hmapset(&dic, hmapkey(dic, v), kouho, nil);
|
||||
}
|
||||
|
||||
olen = okuri = joshi = 0;
|
||||
okurigana[0] = 0;
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
BIN=/$objtype/bin
|
||||
TARG=ktrans
|
||||
HFILES=jisho.h ktrans.h
|
||||
HFILES=ktrans.h
|
||||
OFILES=\
|
||||
hash.$O\
|
||||
main.$O\
|
||||
jisho.$O
|
||||
|
||||
</sys/src/cmd/mkone
|
||||
|
|
Loading…
Reference in a new issue