tcs: add nfc and nfd output formats

This commit is contained in:
Jacob Moody 2023-03-27 03:45:32 +00:00
parent e0d114547c
commit d2753b4d5f
4 changed files with 52 additions and 0 deletions

View file

@ -144,6 +144,12 @@ IBM PC: CP 437
.TP
.B atari
Atari-ST character set
.TP
.B nfd
Unicode Normalization Form D
.TP
.B nfc
Unicode Normalization Form C
.SH EXAMPLES
.TP
.B tcs -f 8859-1

View file

@ -23,6 +23,8 @@ int fixsurrogate(Rune *rp, Rune r2);
void utf_in(int, long *, struct convert *);
void utf_out(Rune *, int, long *);
void utfnfc_out(Rune *, int, long *);
void utfnfd_out(Rune *, int, long *);
void isoutf_in(int, long *, struct convert *);
void isoutf_out(Rune *, int, long *);

View file

@ -613,6 +613,10 @@ struct convert convert[] =
{ "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be },
{ "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le },
{ "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le },
{ "nfc", "Unicode Normalization Form C", From|Func, 0, (Fnptr)utf_in },
{ "nfc", "Unicode Normalization Form C", Func, 0, (Fnptr)utfnfc_out },
{ "nfd", "Unicode Normalization Form D", From|Func, 0, (Fnptr)utf_in },
{ "nfd", "Unicode Normalization Form D", Func, 0, (Fnptr)utfnfd_out },
{ "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
{ "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
{ "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },

View file

@ -68,6 +68,46 @@ utf_out(Rune *base, int n, long *)
write(1, obuf, p-obuf);
}
void
utfnorm_out(Rune *base, int n, int (*fn)(Rune*,Rune*,int))
{
static Rune rbuf[32];
static int nremain = 0;
Rune src[N + 1 + nelem(rbuf)];
Rune dst[N + 1 + nelem(rbuf)];
Rune *p, *p2, *e;
int i;
e = base+n;
for(i = 0; i < nremain; i++,n++)
src[i] = rbuf[i];
nremain = 0;
for(p2 = p = base; n > 0;){
p2 = fullrunenorm(p, n);
if(p == p2)
break;
n -= p2-p;
for(;p < p2; p++)
src[i++] = *p;
}
src[i] = 0;
utf_out(dst, fn(dst, src, sizeof dst), nil);
for(; p2 < e; p2++)
rbuf[nremain++] = *p2;
}
void
utfnfc_out(Rune *base, int n, long *)
{
utfnorm_out(base, n, runecomp);
}
void
utfnfd_out(Rune *base, int n, long *)
{
utfnorm_out(base, n, runedecomp);
}
void
isoutf_in(int fd, long *, struct convert *out)
{