ed: handle Unicode beyond the BMP correctly in list mode.

List mode was constrained to the BMP. This change introduces
the following new list mode convention, using Go string literal syntax:

Non-printing ASCII characters display as \xhh.
Non-ASCII characters in the BMP display as \uhhhh.
Characters beyond the BMP display as \Uhhhhhhhh.
This commit is contained in:
sean 2020-05-21 16:10:30 +01:00 committed by Russ Cox
parent 3850e6e177
commit 95220bf887
2 changed files with 42 additions and 11 deletions

View file

@ -441,10 +441,18 @@ a backspace as
.LR \eb ,
backslashes as
.LR \e\e ,
and non-printing characters as
and non-printing ASCII characters as
a backslash, an
.LR x ,
and four hexadecimal digits.
and two hexadecimal digits.
non-ASCII characters in the Basic Multilingual Plane
are printed as a backslash, a small
.LR u ,
and four hexadecimal digits; and characters above the
Basic Multilingual Plane are printed as a backslash,
a big
.LR U ,
and six hexadecimal digits.
Long lines are folded,
with the second and subsequent sub-lines indented one tab stop.
If the last character in the line is a blank,

View file

@ -21,6 +21,12 @@ enum
EOF = -1
};
enum
{
LINELEN = 70, /* max number of glyphs in a display line */
BELL = 6 /* A char could require up to BELL glyphs to display */
};
void (*oldhup)(int);
void (*oldquit)(int);
int* addr1;
@ -40,7 +46,7 @@ int ichanged;
int io;
Biobuf iobuf;
int lastc;
char line[70];
char line[LINELEN];
Rune* linebp;
Rune linebuf[LBSIZE];
int listf;
@ -1543,7 +1549,7 @@ putchr(int ac)
*lp++ = 'n';
}
} else {
if(col > (72-6-2)) {
if(col > (LINELEN-BELL)) {
col = 8;
*lp++ = '\\';
*lp++ = '\n';
@ -1558,15 +1564,32 @@ putchr(int ac)
if(c == '\t')
c = 't';
col++;
} else
if(c<' ' || c>='\177') {
} else if (c<' ' || c=='\177') {
*lp++ = '\\';
*lp++ = 'x';
*lp++ = hex[c>>12];
*lp++ = hex[c>>8&0xF];
*lp++ = hex[c>>4&0xF];
c = hex[c&0xF];
*lp++ = hex[(c>>4)&0xF];
c = hex[c&0xF];
col += 3;
} else if (c>'\177' && c<=0xFFFF) {
*lp++ = '\\';
*lp++ = 'u';
*lp++ = hex[(c>>12)&0xF];
*lp++ = hex[(c>>8)&0xF];
*lp++ = hex[(c>>4)&0xF];
c = hex[c&0xF];
col += 5;
} else if (c>0xFFFF) {
*lp++ = '\\';
*lp++ = 'U';
*lp++ = hex[(c>>28)&0xF];
*lp++ = hex[(c>>24)&0xF];
*lp++ = hex[(c>>20)&0xF];
*lp++ = hex[(c>>16)&0xF];
*lp++ = hex[(c>>12)&0xF];
*lp++ = hex[(c>>8)&0xF];
*lp++ = hex[(c>>4)&0xF];
c = hex[c&0xF];
col += 9;
}
}
}
@ -1574,7 +1597,7 @@ putchr(int ac)
rune = c;
lp += runetochar(lp, &rune);
if(c == '\n' || lp >= &line[sizeof(line)-5]) {
if(c == '\n' || lp >= &line[LINELEN-BELL]) {
linp = line;
write(oflag? 2: 1, line, lp-line);
return;