mirror of
https://github.com/9fans/plan9port.git
synced 2025-01-24 11:41:58 +00:00
libregexp: revert regexp fix
This commit is contained in:
parent
2deda14e42
commit
6d08a0f548
5 changed files with 113 additions and 117 deletions
|
@ -23,89 +23,90 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
|
|||
}
|
||||
|
||||
/*
|
||||
* Add ip to the list [lp, elp], but only if it is not there already.
|
||||
* These work lists are stored and processed in increasing
|
||||
* order of sp[0], so if the ip is there already, the one that's
|
||||
* there already is a more left match and takes priority.
|
||||
* Note optimization in _renewthread:
|
||||
* *lp must be pending when _renewthread called; if *l has been looked
|
||||
* at already, the optimization is a bug.
|
||||
*/
|
||||
static Relist*
|
||||
_renewthread1(Relist *lp, /* Relist to add to */
|
||||
Relist *elp, /* limit pointer for Relist */
|
||||
extern Relist*
|
||||
_renewthread(Relist *lp, /* _relist to add to */
|
||||
Reinst *ip, /* instruction to add */
|
||||
int ms,
|
||||
Resublist *sep) /* pointers to subexpressions */
|
||||
{
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++)
|
||||
if(p->inst == ip)
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if(sep->m[0].s.sp < p->se.m[0].s.sp){
|
||||
if(ms > 1)
|
||||
p->se = *sep;
|
||||
else
|
||||
p->se.m[0] = sep->m[0];
|
||||
}
|
||||
return 0;
|
||||
|
||||
if(p == elp) /* refuse to overflow buffer */
|
||||
return elp;
|
||||
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
if(ms > 1)
|
||||
p->se = *sep;
|
||||
else
|
||||
p->se.m[0] = sep->m[0];
|
||||
(p+1)->inst = 0;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
extern int
|
||||
_renewthread(Relist *lp, Relist *elp, Reinst *ip, int ms, Resublist *sep)
|
||||
{
|
||||
Relist *ap;
|
||||
|
||||
ap = _renewthread1(lp, elp, ip, ms, sep);
|
||||
if(ap == 0)
|
||||
return 0;
|
||||
if(ap == elp)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Added ip to list at ap.
|
||||
* Expand any ORs right now, so that entire
|
||||
* work list ends up being sorted by increasing m[0].sp.
|
||||
*/
|
||||
for(; ap->inst; ap++){
|
||||
if(ap->inst->type == OR){
|
||||
if(_renewthread1(lp, elp, ap->inst->u1.right, ms, &ap->se) == elp)
|
||||
return -1;
|
||||
if(_renewthread1(lp, elp, ap->inst->u2.next, ms, &ap->se) == elp)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* same as renewthread, but called with
|
||||
* initial empty start pointer.
|
||||
*/
|
||||
extern int
|
||||
extern Relist*
|
||||
_renewemptythread(Relist *lp, /* _relist to add to */
|
||||
Relist *elp,
|
||||
Reinst *ip, /* instruction to add */
|
||||
int ms,
|
||||
char *sp) /* pointers to subexpressions */
|
||||
{
|
||||
Resublist sep;
|
||||
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if(sp < p->se.m[0].s.sp) {
|
||||
if(ms > 1)
|
||||
memset(&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.sp = sp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
if(ms > 1)
|
||||
memset(&sep, 0, sizeof sep);
|
||||
sep.m[0].s.sp = sp;
|
||||
sep.m[0].e.ep = 0;
|
||||
return _renewthread(lp, elp, ip, ms, &sep);
|
||||
memset(&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.sp = sp;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
extern int
|
||||
extern Relist*
|
||||
_rrenewemptythread(Relist *lp, /* _relist to add to */
|
||||
Relist *elp,
|
||||
Reinst *ip, /* instruction to add */
|
||||
int ms,
|
||||
Rune *rsp) /* pointers to subexpressions */
|
||||
{
|
||||
return _renewemptythread(lp, elp, ip, ms, (char*)rsp);
|
||||
Relist *p;
|
||||
|
||||
for(p=lp; p->inst; p++){
|
||||
if(p->inst == ip){
|
||||
if(rsp < p->se.m[0].s.rsp) {
|
||||
if(ms > 1)
|
||||
memset(&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.rsp = rsp;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
p->inst = ip;
|
||||
if(ms > 1)
|
||||
memset(&p->se, 0, sizeof(p->se));
|
||||
p->se.m[0].s.rsp = rsp;
|
||||
(++p)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
|
|
@ -232,7 +232,7 @@ optimize(Reprog *pp)
|
|||
int size;
|
||||
Reprog *npp;
|
||||
Reclass *cl;
|
||||
int diff, proglen;
|
||||
int diff;
|
||||
|
||||
/*
|
||||
* get rid of NOOP chains
|
||||
|
@ -249,13 +249,10 @@ optimize(Reprog *pp)
|
|||
* necessary. Reallocate to the actual space used
|
||||
* and then relocate the code.
|
||||
*/
|
||||
proglen = freep - pp->firstinst;
|
||||
size = sizeof(Reprog) + proglen*sizeof(Reinst);
|
||||
size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
|
||||
npp = realloc(pp, size);
|
||||
if(npp==0 || npp==pp){
|
||||
pp->proglen = proglen;
|
||||
if(npp==0 || npp==pp)
|
||||
return pp;
|
||||
}
|
||||
diff = (char *)npp - (char *)pp;
|
||||
freep = (Reinst *)((char *)freep + diff);
|
||||
for(inst=npp->firstinst; inst<freep; inst++){
|
||||
|
@ -276,7 +273,6 @@ optimize(Reprog *pp)
|
|||
*(char**)(void*)&inst->u2.left += diff;
|
||||
}
|
||||
*(char**)(void*)&npp->startinst += diff;
|
||||
npp->proglen = proglen;
|
||||
return npp;
|
||||
}
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ struct Reljunk
|
|||
Rune* reol;
|
||||
};
|
||||
|
||||
extern int _renewthread(Relist*, Relist*, Reinst*, int, Resublist*);
|
||||
extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*);
|
||||
extern void _renewmatch(Resub*, int, Resublist*);
|
||||
extern int _renewemptythread(Relist*, Relist*, Reinst*, int, char*);
|
||||
extern int _rrenewemptythread(Relist*, Relist*, Reinst*, int, Rune*);
|
||||
extern Relist* _renewemptythread(Relist*, Reinst*, int, char*);
|
||||
extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*);
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include "regexp9.h"
|
||||
#include "regcomp.h"
|
||||
|
||||
|
||||
/*
|
||||
* return 0 if no match
|
||||
* >0 if a match
|
||||
|
@ -12,14 +13,16 @@ regexec1(Reprog *progp, /* program to run */
|
|||
char *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j)
|
||||
Reljunk *j
|
||||
)
|
||||
{
|
||||
int flag=0;
|
||||
Reinst *inst;
|
||||
Relist *tlp;
|
||||
char *s;
|
||||
int i, checkstart, n;
|
||||
int i, checkstart;
|
||||
Rune r, *rp, *ep;
|
||||
int n;
|
||||
Relist* tl; /* This list, next list */
|
||||
Relist* nl;
|
||||
Relist* tle; /* ends of this and next list */
|
||||
|
@ -45,7 +48,7 @@ regexec1(Reprog *progp, /* program to run */
|
|||
switch(j->starttype) {
|
||||
case RUNE:
|
||||
p = utfrune(s, j->startchar);
|
||||
if(p == 0 || (j->eol && p >= j->eol))
|
||||
if(p == 0 || s == j->eol)
|
||||
return match;
|
||||
s = p;
|
||||
break;
|
||||
|
@ -53,7 +56,7 @@ regexec1(Reprog *progp, /* program to run */
|
|||
if(s == bol)
|
||||
break;
|
||||
p = utfrune(s, '\n');
|
||||
if(p == 0 || (j->eol && p+1 >= j->eol))
|
||||
if(p == 0 || s == j->eol)
|
||||
return match;
|
||||
s = p+1;
|
||||
break;
|
||||
|
@ -74,16 +77,17 @@ regexec1(Reprog *progp, /* program to run */
|
|||
|
||||
/* Add first instruction to current list */
|
||||
if(match == 0)
|
||||
_renewemptythread(tl, tle, progp->startinst, ms, s);
|
||||
_renewemptythread(tl, progp->startinst, ms, s);
|
||||
|
||||
/* Execute machine until current list is empty */
|
||||
for(tlp=tl; tlp->inst; tlp++){
|
||||
for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
|
||||
for(inst = tlp->inst; ; inst = inst->u2.next){
|
||||
switch(inst->type){
|
||||
case RUNE: /* regular character */
|
||||
if(inst->u1.r == r)
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(inst->u1.r == r){
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
case LBRA:
|
||||
tlp->se.m[inst->u1.subid].s.sp = s;
|
||||
|
@ -93,11 +97,11 @@ regexec1(Reprog *progp, /* program to run */
|
|||
continue;
|
||||
case ANY:
|
||||
if(r != '\n')
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case ANYNL:
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case BOL:
|
||||
|
@ -112,7 +116,7 @@ regexec1(Reprog *progp, /* program to run */
|
|||
ep = inst->u1.cp->end;
|
||||
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1]){
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
|
@ -123,12 +127,15 @@ regexec1(Reprog *progp, /* program to run */
|
|||
if(r >= rp[0] && r <= rp[1])
|
||||
break;
|
||||
if(rp == ep)
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case OR:
|
||||
/* expanded during renewthread; just a place holder */
|
||||
break;
|
||||
/* evaluate right choice later */
|
||||
if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
|
||||
return -1;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
continue;
|
||||
case END: /* Match! */
|
||||
match = 1;
|
||||
tlp->se.m[0].e.ep = s;
|
||||
|
@ -158,18 +165,19 @@ regexec2(Reprog *progp, /* program to run */
|
|||
int rv;
|
||||
Relist *relist0, *relist1;
|
||||
|
||||
relist0 = malloc((progp->proglen+1)*sizeof(Relist));
|
||||
/* mark space */
|
||||
relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
|
||||
if(relist0 == nil)
|
||||
return -1;
|
||||
relist1 = malloc((progp->proglen+1)*sizeof(Relist));
|
||||
relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
|
||||
if(relist1 == nil){
|
||||
free(relist1);
|
||||
return -1;
|
||||
}
|
||||
j->relist[0] = relist0;
|
||||
j->relist[1] = relist1;
|
||||
j->reliste[0] = relist0 + progp->proglen;
|
||||
j->reliste[1] = relist1 + progp->proglen;
|
||||
j->reliste[0] = relist0 + BIGLISTSIZE - 2;
|
||||
j->reliste[1] = relist1 + BIGLISTSIZE - 2;
|
||||
|
||||
rv = regexec1(progp, bol, mp, ms, j);
|
||||
free(relist0);
|
||||
|
@ -210,8 +218,8 @@ regexec(Reprog *progp, /* program to run */
|
|||
/* mark space */
|
||||
j.relist[0] = relist0;
|
||||
j.relist[1] = relist1;
|
||||
j.reliste[0] = relist0 + nelem(relist0) - 1;
|
||||
j.reliste[1] = relist1 + nelem(relist1) - 1;
|
||||
j.reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j.reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
rv = regexec1(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
|
|
|
@ -9,9 +9,9 @@
|
|||
*/
|
||||
static int
|
||||
rregexec1(Reprog *progp, /* program to run */
|
||||
Rune *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Rune *bol, /* string to run machine on */
|
||||
Resub *mp, /* subexpression elements */
|
||||
int ms, /* number of elements at mp */
|
||||
Reljunk *j)
|
||||
{
|
||||
int flag=0;
|
||||
|
@ -28,7 +28,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
Rune *p;
|
||||
|
||||
match = 0;
|
||||
checkstart = j->starttype;
|
||||
checkstart = j->startchar;
|
||||
if(mp)
|
||||
for(i=0; i<ms; i++) {
|
||||
mp[i].s.rsp = 0;
|
||||
|
@ -46,7 +46,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
switch(j->starttype) {
|
||||
case RUNE:
|
||||
p = runestrchr(s, j->startchar);
|
||||
if(p == 0 || (j->reol && p >= j->reol))
|
||||
if(p == 0 || p == j->reol)
|
||||
return match;
|
||||
s = p;
|
||||
break;
|
||||
|
@ -54,7 +54,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
if(s == bol)
|
||||
break;
|
||||
p = runestrchr(s, '\n');
|
||||
if(p == 0 || (j->reol && p+1 >= j->reol))
|
||||
if(p == 0 || s == j->reol)
|
||||
return match;
|
||||
s = p+1;
|
||||
break;
|
||||
|
@ -71,16 +71,15 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
nl->inst = 0;
|
||||
|
||||
/* Add first instruction to current list */
|
||||
if(match == 0)
|
||||
_rrenewemptythread(tl, tle, progp->startinst, ms, s);
|
||||
_rrenewemptythread(tl, progp->startinst, ms, s);
|
||||
|
||||
/* Execute machine until current list is empty */
|
||||
for(tlp=tl; tlp->inst; tlp++){
|
||||
for(inst = tlp->inst; ; inst = inst->u2.next){
|
||||
for(inst=tlp->inst; ; inst = inst->u2.next){
|
||||
switch(inst->type){
|
||||
case RUNE: /* regular character */
|
||||
if(inst->u1.r == r)
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case LBRA:
|
||||
|
@ -91,11 +90,11 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
continue;
|
||||
case ANY:
|
||||
if(r != '\n')
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case ANYNL:
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case BOL:
|
||||
|
@ -110,7 +109,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
ep = inst->u1.cp->end;
|
||||
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
|
||||
if(r >= rp[0] && r <= rp[1]){
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
|
@ -121,12 +120,15 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
if(r >= rp[0] && r <= rp[1])
|
||||
break;
|
||||
if(rp == ep)
|
||||
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
|
||||
if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
|
||||
return -1;
|
||||
break;
|
||||
case OR:
|
||||
/* expanded during renewthread; just a place holder */
|
||||
break;
|
||||
/* evaluate right choice later */
|
||||
if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
|
||||
return -1;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
continue;
|
||||
case END: /* Match! */
|
||||
match = 1;
|
||||
tlp->se.m[0].e.rep = s;
|
||||
|
@ -139,7 +141,7 @@ rregexec1(Reprog *progp, /* program to run */
|
|||
}
|
||||
if(s == j->reol)
|
||||
break;
|
||||
checkstart = j->starttype && nl->inst==0;
|
||||
checkstart = j->startchar && nl->inst==0;
|
||||
s++;
|
||||
}while(r);
|
||||
return match;
|
||||
|
@ -153,26 +155,15 @@ rregexec2(Reprog *progp, /* program to run */
|
|||
Reljunk *j
|
||||
)
|
||||
{
|
||||
int rv;
|
||||
Relist *relist0, *relist1;
|
||||
Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
|
||||
|
||||
relist0 = malloc((progp->proglen+1)*sizeof(Relist));
|
||||
if(relist0 == nil)
|
||||
return -1;
|
||||
relist1 = malloc((progp->proglen+1)*sizeof(Relist));
|
||||
if(relist1 == nil){
|
||||
free(relist1);
|
||||
return -1;
|
||||
}
|
||||
/* mark space */
|
||||
j->relist[0] = relist0;
|
||||
j->relist[1] = relist1;
|
||||
j->reliste[0] = relist0 + progp->proglen;
|
||||
j->reliste[1] = relist1 + progp->proglen;
|
||||
j->reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j->reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
rv = rregexec1(progp, bol, mp, ms, j);
|
||||
free(relist0);
|
||||
free(relist1);
|
||||
return rv;
|
||||
return rregexec1(progp, bol, mp, ms, j);
|
||||
}
|
||||
|
||||
extern int
|
||||
|
@ -208,8 +199,8 @@ rregexec(Reprog *progp, /* program to run */
|
|||
/* mark space */
|
||||
j.relist[0] = relist0;
|
||||
j.relist[1] = relist1;
|
||||
j.reliste[0] = relist0 + nelem(relist0) - 1;
|
||||
j.reliste[1] = relist1 + nelem(relist1) - 1;
|
||||
j.reliste[0] = relist0 + nelem(relist0) - 2;
|
||||
j.reliste[1] = relist1 + nelem(relist1) - 2;
|
||||
|
||||
rv = rregexec1(progp, bol, mp, ms, &j);
|
||||
if(rv >= 0)
|
||||
|
|
Loading…
Reference in a new issue