mirror of
https://github.com/9fans/plan9port.git
synced 2025-01-12 11:10:07 +00:00
650deb7980
Thanks to wkj for doing all the hard work.
592 lines
9.6 KiB
C
592 lines
9.6 KiB
C
#include <u.h>
|
|
#include <limits.h>
|
|
#include <libc.h>
|
|
#include <draw.h>
|
|
#include <html.h>
|
|
#include "impl.h"
|
|
|
|
Rune whitespace[] = { ' ', '\t', '\n', '\r', '\0' };
|
|
Rune notwhitespace[] = { '^', ' ', '\t', '\n', '\r' , '\0'};
|
|
|
|
// All lists start out like List structure.
|
|
// List itself can be used as list of int.
|
|
int
|
|
_listlen(List* l)
|
|
{
|
|
int n = 0;
|
|
|
|
while(l != nil) {
|
|
l = l->next;
|
|
n++;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
// Cons
|
|
List*
|
|
_newlist(int val, List* rest)
|
|
{
|
|
List* ans;
|
|
|
|
ans = (List*)emalloc(sizeof(List));
|
|
ans->val = val;
|
|
ans->next = rest;
|
|
return ans;
|
|
}
|
|
|
|
// Reverse a list in place
|
|
List*
|
|
_revlist(List* l)
|
|
{
|
|
List* newl;
|
|
List* nextl;
|
|
|
|
newl = nil;
|
|
while(l != nil) {
|
|
nextl = l->next;
|
|
l->next = newl;
|
|
newl = l;
|
|
l = nextl;
|
|
}
|
|
return newl;
|
|
}
|
|
|
|
// The next few routines take a "character class" as argument.
|
|
// e.g., "a-zA-Z", or "^ \t\n"
|
|
// (ranges indicated by - except in first position;
|
|
// ^ is first position means "not in" the following class)
|
|
|
|
// Splitl splits s[0:n] just before first character of class cl.
|
|
// Answers go in (p1, n1) and (p2, n2).
|
|
// If no split, the whole thing goes in the first component.
|
|
// Note: answers contain pointers into original string.
|
|
void
|
|
_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
|
|
{
|
|
Rune* p;
|
|
|
|
p = _Strnclass(s, cl, n);
|
|
*p1 = s;
|
|
if(p == nil) {
|
|
*n1 = n;
|
|
*p2 = nil;
|
|
*n2 = 0;
|
|
}
|
|
else {
|
|
*p2 = p;
|
|
*n1 = p-s;
|
|
*n2 = n-*n1;
|
|
}
|
|
}
|
|
|
|
// Splitr splits s[0:n] just after last character of class cl.
|
|
// Answers go in (p1, n1) and (p2, n2).
|
|
// If no split, the whole thing goes in the last component.
|
|
// Note: answers contain pointers into original string.
|
|
void
|
|
_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
|
|
{
|
|
Rune* p;
|
|
|
|
p = _Strnrclass(s, cl, n);
|
|
if(p == nil) {
|
|
*p1 = nil;
|
|
*n1 = 0;
|
|
*p2 = s;
|
|
*n2 = n;
|
|
}
|
|
else {
|
|
*p1 = s;
|
|
*p2 = p+1;
|
|
*n1 = *p2-s;
|
|
*n2 = n-*n1;
|
|
}
|
|
}
|
|
|
|
// Splitall splits s[0:n] into parts that are separated by characters from class cl.
|
|
// Each part will have nonzero length.
|
|
// At most alen parts are found, and pointers to their starts go into
|
|
// the strarr array, while their lengths go into the lenarr array.
|
|
// The return value is the number of parts found.
|
|
int
|
|
_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
|
|
{
|
|
int i;
|
|
Rune* p;
|
|
Rune* q;
|
|
Rune* slast;
|
|
|
|
if(s == nil || n == 0)
|
|
return 0;
|
|
i = 0;
|
|
p = s;
|
|
slast = s+n;
|
|
while(p < slast && i < alen) {
|
|
while(p < slast && _inclass(*p, cl))
|
|
p++;
|
|
if(p == slast)
|
|
break;
|
|
q = _Strnclass(p, cl, slast-p);
|
|
if(q == nil)
|
|
q = slast;
|
|
assert(q > p && q <= slast);
|
|
strarr[i] = p;
|
|
lenarr[i] = q-p;
|
|
i++;
|
|
p = q;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
// Find part of s that excludes leading and trailing whitespace,
|
|
// and return that part in *pans (and its length in *panslen).
|
|
void
|
|
_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
|
|
{
|
|
Rune* p;
|
|
Rune* q;
|
|
|
|
p = nil;
|
|
if(n > 0) {
|
|
p = _Strnclass(s, notwhitespace, n);
|
|
if(p != nil) {
|
|
q = _Strnrclass(s, notwhitespace, n);
|
|
assert(q != nil);
|
|
n = q+1-p;
|
|
}
|
|
}
|
|
*pans = p;
|
|
*panslen = n;
|
|
}
|
|
|
|
// _Strclass returns a pointer to the first element of s that is
|
|
// a member of class cl, nil if none.
|
|
Rune*
|
|
_Strclass(Rune* s, Rune* cl)
|
|
{
|
|
Rune* p;
|
|
|
|
for(p = s; *p != 0; p++)
|
|
if(_inclass(*p, cl))
|
|
return p;
|
|
return nil;
|
|
}
|
|
|
|
// _Strnclass returns a pointer to the first element of s[0:n] that is
|
|
// a member of class cl, nil if none.
|
|
Rune*
|
|
_Strnclass(Rune* s, Rune* cl, int n)
|
|
{
|
|
Rune* p;
|
|
|
|
for(p = s; n-- && *p != 0; p++)
|
|
if(_inclass(*p, cl))
|
|
return p;
|
|
return nil;
|
|
}
|
|
|
|
// _Strrclass returns a pointer to the last element of s that is
|
|
// a member of class cl, nil if none
|
|
Rune*
|
|
_Strrclass(Rune* s, Rune* cl)
|
|
{
|
|
Rune* p;
|
|
|
|
if(s == nil || *s == 0)
|
|
return nil;
|
|
p = s + runestrlen(s) - 1;
|
|
while(p >= s) {
|
|
if(_inclass(*p, cl))
|
|
return p;
|
|
p--;
|
|
};
|
|
return nil;
|
|
}
|
|
|
|
// _Strnrclass returns a pointer to the last element of s[0:n] that is
|
|
// a member of class cl, nil if none
|
|
Rune*
|
|
_Strnrclass(Rune* s, Rune* cl, int n)
|
|
{
|
|
Rune* p;
|
|
|
|
if(s == nil || *s == 0 || n == 0)
|
|
return nil;
|
|
p = s + n - 1;
|
|
while(p >= s) {
|
|
if(_inclass(*p, cl))
|
|
return p;
|
|
p--;
|
|
};
|
|
return nil;
|
|
}
|
|
|
|
// Is c in the class cl?
|
|
int
|
|
_inclass(Rune c, Rune* cl)
|
|
{
|
|
int n;
|
|
int ans;
|
|
int negate;
|
|
int i;
|
|
|
|
n = _Strlen(cl);
|
|
if(n == 0)
|
|
return 0;
|
|
ans = 0;
|
|
negate = 0;
|
|
if(cl[0] == '^') {
|
|
negate = 1;
|
|
cl++;
|
|
n--;
|
|
}
|
|
for(i = 0; i < n; i++) {
|
|
if(cl[i] == '-' && i > 0 && i < n - 1) {
|
|
if(c >= cl[i - 1] && c <= cl[i + 1]) {
|
|
ans = 1;
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
else if(c == cl[i]) {
|
|
ans = 1;
|
|
break;
|
|
}
|
|
}
|
|
if(negate)
|
|
ans = !ans;
|
|
return ans;
|
|
}
|
|
|
|
// Is pre a prefix of s?
|
|
int
|
|
_prefix(Rune* pre, Rune* s)
|
|
{
|
|
int ns;
|
|
int n;
|
|
int k;
|
|
|
|
ns = _Strlen(s);
|
|
n = _Strlen(pre);
|
|
if(ns < n)
|
|
return 0;
|
|
for(k = 0; k < n; k++) {
|
|
if(pre[k] != s[k])
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
// Number of runes in (null-terminated) s
|
|
int
|
|
_Strlen(Rune* s)
|
|
{
|
|
if(s == nil)
|
|
return 0;
|
|
return runestrlen(s);
|
|
}
|
|
|
|
// -1, 0, 1 as s1 is lexicographically less, equal greater than s2
|
|
int
|
|
_Strcmp(Rune *s1, Rune *s2)
|
|
{
|
|
if(s1 == nil)
|
|
return (s2 == nil || *s2 == 0) ? 0 : -1;
|
|
if(s2 == nil)
|
|
return (*s1 == 0) ? 0 : 1;
|
|
return runestrcmp(s1, s2);
|
|
}
|
|
|
|
// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
|
|
// Also, do a case-insensitive match, assuming s2
|
|
// has no chars in [A-Z], only their lowercase versions.
|
|
// (This routine is used for in-place keyword lookup, where s2 is in a keyword
|
|
// list and s1 is some substring, possibly mixed-case, in a buffer.)
|
|
int
|
|
_Strncmpci(Rune *s1, int n1, Rune *s2)
|
|
{
|
|
Rune c1, c2;
|
|
|
|
for(;;) {
|
|
if(n1-- == 0) {
|
|
if(*s2 == 0)
|
|
return 0;
|
|
return -1;
|
|
}
|
|
c1 = *s1++;
|
|
c2 = *s2++;
|
|
if(c1 >= 'A' && c1 <= 'Z')
|
|
c1 = c1 - 'A' + 'a';
|
|
if(c1 != c2) {
|
|
if(c1 > c2)
|
|
return 1;
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// emalloc and copy
|
|
Rune*
|
|
_Strdup(Rune* s)
|
|
{
|
|
if(s == nil)
|
|
return nil;
|
|
return _Strndup(s, runestrlen(s));
|
|
}
|
|
|
|
// emalloc and copy n chars of s (assume s is at least that long),
|
|
// and add 0 terminator.
|
|
// Return nil if n==0.
|
|
Rune*
|
|
_Strndup(Rune* s, int n)
|
|
{
|
|
Rune* ans;
|
|
|
|
if(n <= 0)
|
|
return nil;
|
|
ans = _newstr(n);
|
|
memmove(ans, s, n*sizeof(Rune));
|
|
ans[n] = 0;
|
|
return ans;
|
|
}
|
|
// emalloc enough room for n Runes, plus 1 null terminator.
|
|
// (Not initialized to anything.)
|
|
Rune*
|
|
_newstr(int n)
|
|
{
|
|
return (Rune*)emalloc((n+1)*sizeof(Rune));
|
|
}
|
|
|
|
// emalloc and copy s+t
|
|
Rune*
|
|
_Strdup2(Rune* s, Rune* t)
|
|
{
|
|
int ns, nt;
|
|
Rune* ans;
|
|
Rune* p;
|
|
|
|
ns = _Strlen(s);
|
|
nt = _Strlen(t);
|
|
if(ns+nt == 0)
|
|
return nil;
|
|
ans = _newstr(ns+nt);
|
|
p = _Stradd(ans, s, ns);
|
|
p = _Stradd(p, t, nt);
|
|
*p = 0;
|
|
return ans;
|
|
}
|
|
|
|
// Return emalloc'd substring s[start:stop],
|
|
Rune*
|
|
_Strsubstr(Rune* s, int start, int stop)
|
|
{
|
|
Rune* t;
|
|
|
|
if(start == stop)
|
|
return nil;
|
|
t = _Strndup(s+start, stop-start);
|
|
return t;
|
|
}
|
|
|
|
// Copy n chars to s1 from s2, and return s1+n
|
|
Rune*
|
|
_Stradd(Rune* s1, Rune* s2, int n)
|
|
{
|
|
if(n == 0)
|
|
return s1;
|
|
memmove(s1, s2, n*sizeof(Rune));
|
|
return s1+n;
|
|
}
|
|
|
|
// Like strtol, but converting from Rune* string
|
|
|
|
//#define LONG_MAX 2147483647L
|
|
//#define LONG_MIN -2147483648L
|
|
|
|
long
|
|
_Strtol(Rune* nptr, Rune** endptr, int base)
|
|
{
|
|
Rune* p;
|
|
long n, nn;
|
|
int c, ovfl, v, neg, ndig;
|
|
|
|
p = nptr;
|
|
neg = 0;
|
|
n = 0;
|
|
ndig = 0;
|
|
ovfl = 0;
|
|
|
|
/*
|
|
* White space
|
|
*/
|
|
for(;;p++){
|
|
switch(*p){
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
case '\f':
|
|
case '\r':
|
|
case '\v':
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Sign
|
|
*/
|
|
if(*p=='-' || *p=='+')
|
|
if(*p++ == '-')
|
|
neg = 1;
|
|
|
|
/*
|
|
* Base
|
|
*/
|
|
if(base==0){
|
|
if(*p != '0')
|
|
base = 10;
|
|
else{
|
|
base = 8;
|
|
if(p[1]=='x' || p[1]=='X'){
|
|
p += 2;
|
|
base = 16;
|
|
}
|
|
}
|
|
}else if(base==16 && *p=='0'){
|
|
if(p[1]=='x' || p[1]=='X')
|
|
p += 2;
|
|
}else if(base<0 || 36<base)
|
|
goto Return;
|
|
|
|
/*
|
|
* Non-empty sequence of digits
|
|
*/
|
|
for(;; p++,ndig++){
|
|
c = *p;
|
|
v = base;
|
|
if('0'<=c && c<='9')
|
|
v = c - '0';
|
|
else if('a'<=c && c<='z')
|
|
v = c - 'a' + 10;
|
|
else if('A'<=c && c<='Z')
|
|
v = c - 'A' + 10;
|
|
if(v >= base)
|
|
break;
|
|
nn = n*base + v;
|
|
if(nn < n)
|
|
ovfl = 1;
|
|
n = nn;
|
|
}
|
|
|
|
Return:
|
|
if(ndig == 0)
|
|
p = nptr;
|
|
if(endptr)
|
|
*endptr = p;
|
|
if(ovfl){
|
|
if(neg)
|
|
return LONG_MIN;
|
|
return LONG_MAX;
|
|
}
|
|
if(neg)
|
|
return -n;
|
|
return n;
|
|
}
|
|
|
|
// Convert buf[0:n], bytes whose character set is chset,
|
|
// into a emalloc'd null-terminated Unicode string.
|
|
Rune*
|
|
toStr(uchar* buf, int n, int chset)
|
|
{
|
|
int i;
|
|
int m;
|
|
Rune ch;
|
|
Rune* ans;
|
|
|
|
switch(chset) {
|
|
case US_Ascii:
|
|
case ISO_8859_1:
|
|
ans = (Rune*)emalloc((n+1)*sizeof(Rune));
|
|
for(i = 0; i < n; i++)
|
|
ans[i] = buf[i];
|
|
ans[n] = 0;
|
|
break;
|
|
|
|
case UTF_8:
|
|
m = 0;
|
|
for(i = 0; i < n; ) {
|
|
i += chartorune(&ch, (char*)(buf+i));
|
|
m++;
|
|
}
|
|
ans = (Rune*)emalloc((m+1)*sizeof(Rune));
|
|
m = 0;
|
|
for(i = 0; i < n; ) {
|
|
i += chartorune(&ch, (char*)(buf+i));
|
|
ans[m++] = ch;
|
|
}
|
|
ans[m] = 0;
|
|
break;
|
|
|
|
default:
|
|
ans = nil;
|
|
assert(0);
|
|
}
|
|
return ans;
|
|
}
|
|
|
|
// Convert buf[0:n], Unicode characters,
|
|
// into an emalloc'd null-terminated string in character set chset.
|
|
// Use 0x80 for unconvertable characters.
|
|
uchar*
|
|
fromStr(Rune* buf, int n, int chset)
|
|
{
|
|
uchar* ans;
|
|
int i, lim, m;
|
|
Rune ch;
|
|
uchar* p;
|
|
uchar s[UTFmax];
|
|
|
|
ans = nil;
|
|
switch(chset) {
|
|
case US_Ascii:
|
|
case ISO_8859_1:
|
|
ans = (uchar*)emalloc(n+1);
|
|
lim = (chset==US_Ascii)? 127 : 255;
|
|
for(i = 0; i < n; i++) {
|
|
ch = buf[i];
|
|
if(ch > lim)
|
|
ch = 0x80;
|
|
ans[i] = ch;
|
|
}
|
|
ans[n] = 0;
|
|
break;
|
|
|
|
case UTF_8:
|
|
m = 0;
|
|
for(i = 0; i < n; i++) {
|
|
m += runetochar((char*)s, &buf[i]);
|
|
}
|
|
ans = (uchar*)emalloc(m+1);
|
|
p = ans;
|
|
for(i = 0; i < n; i++)
|
|
p += runetochar((char*)p, &buf[i]);
|
|
*p = 0;
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
return ans;
|
|
|
|
}
|
|
|
|
// Convert n to emalloc'd String.
|
|
Rune*
|
|
_ltoStr(int n)
|
|
{
|
|
int m;
|
|
uchar buf[20];
|
|
|
|
m = snprint((char*)buf, sizeof(buf), "%d", n);
|
|
return toStr(buf, m, US_Ascii);
|
|
}
|