This patch fixes unicode, unicode-be and unicode-le

input, and unicode output, which have been broken due
to the change of the size of Rune from two bytes to
four bytes.

(Unicode-le and unicode-be output have not been
affected, since they work different, and produce
correct output as long as sizeof(Rune) >= 2.)

In all of the affected functions an extra array of N
elements of type u16int is used to temporarily store
the 16-bit unicode data.  As this increases stack
usage, perhaps a different, slightly more complicated
solution might be preferred, where only a single
`Rune buf[N]' is used for both temporary storage and
the result.

R=rsc
CC=codebot
http://codereview.appspot.com/1574041
This commit is contained in:
Michael Teichgräber 2010-06-28 14:43:28 -07:00 committed by Russ Cox
parent 25989d9eca
commit c006e98419

View file

@ -187,17 +187,19 @@ swab2(char *b, int n)
void void
unicode_in(int fd, long *notused, struct convert *out) unicode_in(int fd, long *notused, struct convert *out)
{ {
u16int ubuf[N];
Rune buf[N]; Rune buf[N];
int n; int i, n;
int swabme; int swabme;
USED(notused); USED(notused);
if(read(fd, (char *)buf, 2) != 2) if(read(fd, (char *)ubuf, 2) != 2)
return; return;
ninput += 2; ninput += 2;
switch(buf[0]) switch(ubuf[0])
{ {
default: default:
buf[0] = ubuf[0];
OUT(out, buf, 1); OUT(out, buf, 1);
case 0xFEFF: case 0xFEFF:
swabme = 0; swabme = 0;
@ -206,10 +208,12 @@ unicode_in(int fd, long *notused, struct convert *out)
swabme = 1; swabme = 1;
break; break;
} }
while((n = read(fd, (char *)buf, 2*N)) > 0){ while((n = read(fd, (char *)ubuf, 2*N)) > 0){
ninput += n; ninput += n;
if(swabme) if(swabme)
swab2((char *)buf, n); swab2((char *)ubuf, n);
for(i=0; i<n/2; i++)
buf[i] = ubuf[i];
if(n&1){ if(n&1){
if(squawk) if(squawk)
EPR "%s: odd byte count in %s\n", argv0, file); EPR "%s: odd byte count in %s\n", argv0, file);
@ -227,13 +231,14 @@ void
unicode_in_be(int fd, long *notused, struct convert *out) unicode_in_be(int fd, long *notused, struct convert *out)
{ {
int i, n; int i, n;
u16int ubuf[N];
Rune buf[N], r; Rune buf[N], r;
uchar *p; uchar *p;
USED(notused); USED(notused);
while((n = read(fd, (char *)buf, 2*N)) > 0){ while((n = read(fd, (char *)ubuf, 2*N)) > 0){
ninput += n; ninput += n;
p = (uchar*)buf; p = (uchar*)ubuf;
for(i=0; i<n/2; i++){ for(i=0; i<n/2; i++){
r = *p++<<8; r = *p++<<8;
r |= *p++; r |= *p++;
@ -257,13 +262,14 @@ void
unicode_in_le(int fd, long *notused, struct convert *out) unicode_in_le(int fd, long *notused, struct convert *out)
{ {
int i, n; int i, n;
u16int ubuf[N];
Rune buf[N], r; Rune buf[N], r;
uchar *p; uchar *p;
USED(notused); USED(notused);
while((n = read(fd, (char *)buf, 2*N)) > 0){ while((n = read(fd, (char *)ubuf, 2*N)) > 0){
ninput += n; ninput += n;
p = (uchar*)buf; p = (uchar*)ubuf;
for(i=0; i<n/2; i++){ for(i=0; i<n/2; i++){
r = *p++; r = *p++;
r |= *p++<<8; r |= *p++<<8;
@ -287,17 +293,21 @@ void
unicode_out(Rune *base, int n, long *notused) unicode_out(Rune *base, int n, long *notused)
{ {
static int first = 1; static int first = 1;
u16int buf[N];
int i;
USED(notused); USED(notused);
nrunes += n; nrunes += n;
if(first){ if(first){
unsigned short x = 0xFEFF; u16int x = 0xFEFF;
noutput += 2; noutput += 2;
write(1, (char *)&x, 2); write(1, (char *)&x, 2);
first = 0; first = 0;
} }
noutput += 2*n; noutput += 2*n;
write(1, (char *)base, 2*n); for(i=0; i<n; i++)
buf[i] = base[i];
write(1, (char *)buf, 2*n);
} }
void void