awk: split record into runes for empty FS (#292)

awk was splitting records into bytes instead of runes for empty FS.
For example, this was printing only the first byte of the utf-8 encoding
of é:

	echo é | awk 'BEGIN{FS=""}{print $1}'

The change just copies how the `split` function handles runes.

Originally reported by kris on twitter:
https://twitter.com/p9luv/status/1180436083433201665
This commit is contained in:
Fazlul Shahriar 2019-10-29 10:04:06 -04:00 committed by Dan Cross
parent 715807d706
commit 1309450668

View file

@ -29,6 +29,7 @@ THIS SOFTWARE.
#include <errno.h>
#include <stdlib.h>
#include <stdarg.h>
#include <utf.h>
#include "awk.h"
#include "y.tab.h"
@ -293,15 +294,19 @@ void fldbld(void) /* create fields from current record */
}
*fr = 0;
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
for (i = 0; *r != 0; r++) {
char buf[2];
int nb;
for (i = 0; *r != 0; r += nb) {
Rune rr;
char buf[UTFmax+1];
i++;
if (i > nfields)
growfldtab(i);
if (freeable(fldtab[i]))
xfree(fldtab[i]->sval);
buf[0] = *r;
buf[1] = 0;
nb = chartorune(&rr, r);
memmove(buf, r, nb);
buf[nb] = '\0';
fldtab[i]->sval = tostring(buf);
fldtab[i]->tval = FLD | STR;
}