mirror of
https://github.com/9fans/plan9port.git
synced 2025-01-27 11:52:03 +00:00
acme: regexp fix (see libregexp change)
This commit is contained in:
parent
a7511dd43d
commit
6f16e7fc1b
1 changed files with 59 additions and 51 deletions
|
@ -521,26 +521,56 @@ classmatch(int classno, int c, int negate)
|
|||
}
|
||||
|
||||
/*
|
||||
* Note optimization in addinst:
|
||||
* *l must be pending when addinst called; if *l has been looked
|
||||
* at already, the optimization is a bug.
|
||||
* Add inst to the list [l, l+NLIST), but only if it is not there already.
|
||||
* These work lists are stored and processed in increasing
|
||||
* order of sep->p[0], so if the inst is there already, the one
|
||||
* there already is a more left match and takes priority.
|
||||
* (This works for backward searches too, because there
|
||||
* is no explicit comparison.)
|
||||
*/
|
||||
int
|
||||
addinst(Ilist *l, Inst *inst, Rangeset *sep)
|
||||
Ilist*
|
||||
addinst1(Ilist *l, Inst *inst, Rangeset *sep)
|
||||
{
|
||||
Ilist *p;
|
||||
|
||||
for(p = l; p->inst; p++){
|
||||
if(p->inst==inst){
|
||||
if((sep)->r[0].q0 < p->se.r[0].q0)
|
||||
p->se= *sep; /* this would be bug */
|
||||
return 0; /* It's already there */
|
||||
for(p = l; p->inst; p++)
|
||||
if(p->inst==inst)
|
||||
return 0;
|
||||
|
||||
if(p == l+NLIST)
|
||||
return l+NLIST;
|
||||
|
||||
p->inst = inst;
|
||||
p->se = *sep;
|
||||
(p+1)->inst = 0;
|
||||
return p;
|
||||
}
|
||||
|
||||
int
|
||||
addinst(Ilist *l, Inst *inst, Rangeset *sep)
|
||||
{
|
||||
Ilist *ap;
|
||||
|
||||
ap = addinst1(l, inst, sep);
|
||||
if(ap == 0)
|
||||
return 0;
|
||||
if(ap == l+NLIST)
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Added inst to list at ap.
|
||||
* Expand any ORs right now, so that entire
|
||||
* work list ends up being sorted by increasing sep->p[0].
|
||||
*/
|
||||
for(; ap->inst; ap++){
|
||||
if(ap->inst->type == OR){
|
||||
if(addinst1(l, ap->inst->u1.left, &ap->se) == l+NLIST)
|
||||
return -1;
|
||||
if(addinst1(l, ap->inst->u.right, &ap->se) == l+NLIST)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
p->inst = inst;
|
||||
p->se= *sep;
|
||||
(p+1)->inst = nil;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -557,7 +587,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
|||
Inst *inst;
|
||||
Ilist *tlp;
|
||||
uint p;
|
||||
int nnl, ntl;
|
||||
int nc, c;
|
||||
int wrapped;
|
||||
int startchar;
|
||||
|
@ -566,7 +595,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
|||
p = startp;
|
||||
startchar = 0;
|
||||
wrapped = 0;
|
||||
nnl = 0;
|
||||
if(startinst->type<OPERATOR)
|
||||
startchar = startinst->type;
|
||||
list[0][0].inst = list[1][0].inst = nil;
|
||||
|
@ -576,6 +604,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
|||
else
|
||||
nc = runestrlen(r);
|
||||
/* Execute machine once for each character */
|
||||
nl = list[0];
|
||||
for(;;p++){
|
||||
doloop:
|
||||
if(p>=eof || p>=nc){
|
||||
|
@ -594,7 +623,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
|||
}
|
||||
c = 0;
|
||||
}else{
|
||||
if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0)
|
||||
if(((wrapped && p>=startp) || sel.r[0].q0>0) && nl->inst==0)
|
||||
break;
|
||||
if(t != nil)
|
||||
c = textreadc(t, p);
|
||||
|
@ -602,18 +631,15 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
|||
c = r[p];
|
||||
}
|
||||
/* fast check for first char */
|
||||
if(startchar && nnl==0 && c!=startchar)
|
||||
if(startchar && nl->inst==0 && c!=startchar)
|
||||
continue;
|
||||
tl = list[flag];
|
||||
nl = list[flag^=1];
|
||||
nl->inst = nil;
|
||||
ntl = nnl;
|
||||
nnl = 0;
|
||||
if(sel.r[0].q0<0 && (!wrapped || p<startp || startp==eof)){
|
||||
/* Add first instruction to this list */
|
||||
sempty.r[0].q0 = p;
|
||||
if(addinst(tl, startinst, &sempty))
|
||||
if(++ntl >= NLIST){
|
||||
if(addinst(tl, startinst, &sempty) < 0){
|
||||
Overflow:
|
||||
warning(nil, "regexp list overflow\n");
|
||||
sel.r[0].q0 = -1;
|
||||
|
@ -627,8 +653,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
|||
default: /* regular character */
|
||||
if(inst->type==c){
|
||||
Addinst:
|
||||
if(addinst(nl, inst->u1.next, &tlp->se))
|
||||
if(++nnl >= NLIST)
|
||||
if(addinst(nl, inst->u1.next, &tlp->se) < 0)
|
||||
goto Overflow;
|
||||
}
|
||||
break;
|
||||
|
@ -666,13 +691,8 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Rangeset *rp)
|
|||
goto Addinst;
|
||||
break;
|
||||
case OR:
|
||||
/* evaluate right choice later */
|
||||
if(addinst(tl, inst->u.right, &tlp->se))
|
||||
if(++ntl >= NLIST)
|
||||
goto Overflow;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
inst = inst->u1.left;
|
||||
goto Switchstmt;
|
||||
/* already expanded; just a placeholder */
|
||||
break;
|
||||
case END: /* Match! */
|
||||
tlp->se.r[0].q1 = p;
|
||||
newmatch(&tlp->se);
|
||||
|
@ -700,13 +720,11 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
|||
Inst *inst;
|
||||
Ilist *tlp;
|
||||
int p;
|
||||
int nnl, ntl;
|
||||
int c;
|
||||
int wrapped;
|
||||
int startchar;
|
||||
|
||||
flag = 0;
|
||||
nnl = 0;
|
||||
wrapped = 0;
|
||||
p = startp;
|
||||
startchar = 0;
|
||||
|
@ -715,6 +733,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
|||
list[0][0].inst = list[1][0].inst = nil;
|
||||
sel.r[0].q0= -1;
|
||||
/* Execute machine once for each character, including terminal NUL */
|
||||
nl = list[0];
|
||||
for(;;--p){
|
||||
doloop:
|
||||
if(p <= 0){
|
||||
|
@ -734,24 +753,20 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
|||
}
|
||||
c = 0;
|
||||
}else{
|
||||
if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0)
|
||||
if(((wrapped && p<=startp) || sel.r[0].q0>0) && nl->inst==0)
|
||||
break;
|
||||
c = textreadc(t, p-1);
|
||||
}
|
||||
/* fast check for first char */
|
||||
if(startchar && nnl==0 && c!=startchar)
|
||||
if(startchar && nl->inst==0 && c!=startchar)
|
||||
continue;
|
||||
tl = list[flag];
|
||||
nl = list[flag^=1];
|
||||
nl->inst = nil;
|
||||
ntl = nnl;
|
||||
nnl = 0;
|
||||
if(sel.r[0].q0<0 && (!wrapped || p>startp)){
|
||||
/* Add first instruction to this list */
|
||||
/* the minus is so the optimizations in addinst work */
|
||||
sempty.r[0].q0 = -p;
|
||||
if(addinst(tl, bstartinst, &sempty))
|
||||
if(++ntl >= NLIST){
|
||||
sempty.r[0].q0 = p;
|
||||
if(addinst(tl, bstartinst, &sempty) < 0){
|
||||
Overflow:
|
||||
warning(nil, "regexp list overflow\n");
|
||||
sel.r[0].q0 = -1;
|
||||
|
@ -765,8 +780,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
|||
default: /* regular character */
|
||||
if(inst->type == c){
|
||||
Addinst:
|
||||
if(addinst(nl, inst->u1.next, &tlp->se))
|
||||
if(++nnl >= NLIST)
|
||||
if(addinst(nl, inst->u1.next, &tlp->se) < 0)
|
||||
goto Overflow;
|
||||
}
|
||||
break;
|
||||
|
@ -804,15 +818,9 @@ rxbexecute(Text *t, uint startp, Rangeset *rp)
|
|||
goto Addinst;
|
||||
break;
|
||||
case OR:
|
||||
/* evaluate right choice later */
|
||||
if(addinst(tl, inst->u.right, &tlp->se))
|
||||
if(++ntl >= NLIST)
|
||||
goto Overflow;
|
||||
/* efficiency: advance and re-evaluate */
|
||||
inst = inst->u1.left;
|
||||
goto Switchstmt;
|
||||
/* already expanded; just a placeholder */
|
||||
break;
|
||||
case END: /* Match! */
|
||||
tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus sign */
|
||||
tlp->se.r[0].q1 = p;
|
||||
bnewmatch(&tlp->se);
|
||||
break;
|
||||
|
|
Loading…
Reference in a new issue