kernel: fix EDF scheduler double ready() and more robust double-ready detection

Move the "double-ready" check into queueproc() function,
doing it while holding the runq lock, meaning
all transitions to Ready state are serialized.

We do not just check for double-ready but for any
"illegal" transisions:

ready() on Dead, Moribund, New, Ready, Running and Waitrelease
is not allowed.

ready() on Queueing*, Wakeme, Broken, Stopped and Rendez
is only valid when done from another process.

For rescheduling, we have to go to Scheding state
before calling ready(). (rebalance(), schedinit()...)

The EDF scheduler had this bug where it could ready() multiple
times as it was staying in Waitrelease state after releasing
the edflock. Now it transitions thru Scheding avoiding
the issue.
This commit is contained in:
cinap_lenrek 2024-01-05 02:12:42 +00:00
parent 39321d74d8
commit eaffa1ef55
4 changed files with 136 additions and 150 deletions

View file

@ -209,7 +209,6 @@ static void
releaseintr(Ureg *u, Timer *t)
{
Proc *p;
Schedq *rq;
if(panicking || active.exiting)
return;
@ -224,21 +223,17 @@ releaseintr(Ureg *u, Timer *t)
return;
case Ready:
/* remove proc from current runq */
rq = &runq[p->priority];
if(dequeueproc(rq, p) != p){
if(dequeueproc(&runq[p->priority], p) != p){
DPRINT("releaseintr: can't find proc or lock race\n");
release(p); /* It'll start best effort */
edfunlock();
return;
}
p->state = Waitrelease;
/* fall through */
case Waitrelease:
p->state = Scheding;
release(p);
edfunlock();
if(p->state == Wakeme){
iprint("releaseintr: wakeme\n");
}
ready(p);
if(up){
up->delaysched++;
@ -412,13 +407,13 @@ edfadmit(Proc *p)
DPRINT("%lud edfadmit other %lud[%s], release at %lud\n",
now, p->pid, statename[p->state], e->t);
if(e->tt == nil){
e->tf = releaseintr;
e->ta = p;
tns = e->t - now;
if(tns < 20)
tns = 20;
e->tns = 1000LL * tns;
e->tmode = Trelative;
e->tf = releaseintr;
e->ta = p;
timeradd(e);
}
}
@ -476,8 +471,8 @@ edfyield(void)
if(n < 20)
n = 20;
up->tns = 1000LL * n;
up->tf = releaseintr;
up->tmode = Trelative;
up->tf = releaseintr;
up->ta = up;
up->trend = &up->sleep;
timeradd(up);
@ -488,6 +483,8 @@ edfyield(void)
nexterror();
}
sleep(&up->sleep, yfn, nil);
up->trend = nil;
timerdel(up);
poperror();
}
@ -495,17 +492,10 @@ int
edfready(Proc *p)
{
Edf *e;
Schedq *rq;
Proc *l, *pp;
void (*pt)(Proc*, int, vlong);
long n;
if((e = edflock(p)) == nil)
return 0;
if(p->state == Wakeme && p->r){
iprint("edfready: wakeme\n");
}
if(e->d - now <= 0){
/* past deadline, arrange for next release */
if((e->flags & Sporadic) == 0){
@ -550,7 +540,7 @@ edfready(Proc *p)
now, p->pid, statename[p->state], e->t);
p->state = Waitrelease;
edfunlock();
return 1; /* Make runnable later */
return -1; /* Make runnable later */
}
DPRINT("%lud edfready %lud %s release now\n", now, p->pid, statename[p->state]);
/* release now */
@ -558,31 +548,6 @@ edfready(Proc *p)
}
edfunlock();
DPRINT("^");
rq = &runq[PriEdf];
/* insert in queue in earliest deadline order */
lock(runq);
l = nil;
for(pp = rq->head; pp; pp = pp->rnext){
if(pp->edf->d > e->d)
break;
l = pp;
}
p->rnext = pp;
if (l == nil)
rq->head = p;
else
l->rnext = p;
if(pp == nil)
rq->tail = p;
rq->n++;
nrdy++;
runvec |= 1 << PriEdf;
p->priority = PriEdf;
p->readytime = m->ticks;
p->state = Ready;
unlock(runq);
if(p->trace && (pt = proctrace))
pt(p, SReady, 0);
return 1;
}

View file

@ -136,7 +136,7 @@ timerdel(Timer *dt)
/* rare, but tf can still be active on another cpu */
while(dt->tactive == mp && dt->tt == nil)
if(up->nlocks == 0 && islo())
if(up->state == Running && up->nlocks == 0 && islo())
sched();
}

View file

@ -769,7 +769,6 @@ struct Proc
ulong cpu; /* cpu average */
ulong lastupdate;
uchar yield; /* non-zero if the process just did a sleep(0) */
ulong readytime; /* time process came ready */
int preempted; /* true if this process hasn't finished the interrupt
* that last preempted it
*/

View file

@ -12,8 +12,8 @@
int schedgain = 30; /* units in seconds */
int nrdy;
void updatecpu(Proc*);
int reprioritize(Proc*);
static void updatecpu(Proc*);
static int reprioritize(Proc*);
ulong delayedscheds; /* statistics */
ulong skipscheds;
@ -78,31 +78,28 @@ schedinit(void) /* never returns */
updatecpu(up);
break;
case Running:
up->state = Scheding;
ready(up);
break;
case Moribund:
mmurelease(up);
up->state = Dead;
edfstop(up);
if(up->edf != nil){
free(up->edf);
up->edf = nil;
}
lock(&procalloc);
up->state = Dead;
up->mach = nil;
up->qnext = procalloc.free;
procalloc.free = up;
/* proc is free now, make sure unlock() wont touch it */
up = procalloc.Lock.p = nil;
unlock(&procalloc);
sched();
goto out;
}
coherence();
up->mach = nil;
up = nil;
}
out:
sched();
panic("schedinit");
}
int
@ -164,15 +161,12 @@ procswitch(void)
void
sched(void)
{
Proc *p;
if(m->ilockdepth)
panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
panic("cpu%d: ilockdepth %d, last lock %#p at %#p",
m->machno,
m->ilockdepth,
up != nil ? up->lastilock: nil,
(up != nil && up->lastilock != nil) ? up->lastilock->pc: 0,
getcallerpc(&p+2));
(up != nil && up->lastilock != nil) ? up->lastilock->pc: 0);
if(up != nil) {
/*
* Delay the sched until the process gives up the locks
@ -204,18 +198,15 @@ sched(void)
spllo();
return;
}
p = runproc();
if(p->edf == nil){
updatecpu(p);
p->priority = reprioritize(p);
}
if(p != m->readied)
up = runproc();
if(up->edf == nil)
up->priority = reprioritize(up);
if(up != m->readied)
m->schedticks = m->ticks + HZ/10;
m->readied = nil;
up = p;
up->state = Running;
up->mach = MACHP(m->machno);
m->proc = up;
up->mach = up->mp = MACHP(m->machno);
up->state = Running;
mmuswitch(up);
gotolabel(&up->sched);
}
@ -310,7 +301,7 @@ preempted(int clockintr)
* to maintain accurate cpu usage statistics. It can be called
* at any time to bring the stats for a given proc up-to-date.
*/
void
static void
updatecpu(Proc *p)
{
ulong t, ocpu, n, D;
@ -348,11 +339,12 @@ updatecpu(Proc *p)
* of 3 means you're just right. Having a higher priority (up to p->basepri)
* means you're not using as much as you could.
*/
int
static int
reprioritize(Proc *p)
{
int fairshare, n, load, ratio;
updatecpu(p);
load = MACHP(0)->load;
if(load == 0)
return p->basepri;
@ -378,26 +370,100 @@ reprioritize(Proc *p)
/*
* add a process to a scheduling queue
*/
void
static int
queueproc(Schedq *rq, Proc *p)
{
int pri;
int pri = rq - runq;
pri = rq - runq;
lock(runq);
switch(p->state){
case New:
case Queueing:
case QueueingR:
case QueueingW:
case Wakeme:
case Broken:
case Stopped:
case Rendezvous:
if(p != up)
break;
/* wet floor */
case Dead:
case Moribund:
case Ready:
case Running:
case Waitrelease:
unlock(runq);
return -1;
}
p->state = Ready;
p->priority = pri;
p->rnext = nil;
if(rq->tail != nil)
rq->tail->rnext = p;
else
rq->head = p;
rq->tail = p;
if(pri == PriEdf){
Proc *pp, *l;
/* insert in queue in earliest deadline order */
l = nil;
for(pp = rq->head; pp != nil; pp = pp->rnext){
if(pp->edf->d > p->edf->d)
break;
l = pp;
}
p->rnext = pp;
if(l == nil)
rq->head = p;
else
l->rnext = p;
if(pp == nil)
rq->tail = p;
} else {
p->rnext = nil;
if(rq->tail != nil)
rq->tail->rnext = p;
else
rq->head = p;
rq->tail = p;
}
rq->n++;
nrdy++;
runvec |= 1<<pri;
unlock(runq);
return 0;
}
/*
* ready(p) picks a new priority for a process and sticks it in the
* runq for that priority.
*/
void
ready(Proc *p)
{
int s, pri;
s = splhi();
switch(edfready(p)){
default:
splx(s);
return;
case 0:
pri = reprioritize(p);
break;
case 1:
pri = PriEdf;
break;
}
if(queueproc(&runq[pri], p) < 0){
iprint("ready %s %lud %s pc %p\n",
p->text, p->pid, statename[p->state], getcallerpc(&p));
} else {
void (*pt)(Proc*, int, vlong);
pt = proctrace;
if(pt != nil)
pt(p, SReady, 0);
}
splx(s);
}
/*
* try to remove a process from a scheduling queue (called splhi)
*/
@ -437,62 +503,15 @@ dequeueproc(Schedq *rq, Proc *tp)
runvec &= ~(1<<(rq-runq));
rq->n--;
nrdy--;
if(p->state != Ready)
print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
if(p->state != Ready){
iprint("dequeueproc %s %lud %s pc %p\n",
p->text, p->pid, statename[p->state], getcallerpc(&rq));
p = nil;
}
unlock(runq);
return p;
}
/*
* ready(p) picks a new priority for a process and sticks it in the
* runq for that priority.
*/
void
ready(Proc *p)
{
int s, pri;
Schedq *rq;
void (*pt)(Proc*, int, vlong);
switch(p->state){
case Running:
if(p == up)
break;
/* wet floor */
case Dead:
case Moribund:
case Scheding:
print("ready %s %s %lud pc %p\n", statename[p->state],
p->text, p->pid, getcallerpc(&p));
return;
case Ready:
print("double ready %s %lud pc %p\n",
p->text, p->pid, getcallerpc(&p));
return;
}
s = splhi();
if(edfready(p)){
splx(s);
return;
}
if(up != p && (p->wired == nil || p->wired == MACHP(m->machno)))
m->readied = p; /* group scheduling */
updatecpu(p);
pri = reprioritize(p);
p->priority = pri;
rq = &runq[pri];
p->state = Ready;
queueproc(rq, p);
pt = proctrace;
if(pt != nil)
pt(p, SReady, 0);
splx(s);
}
/*
* yield the processor and drop our priority
*/
@ -516,7 +535,7 @@ ulong balancetime;
static void
rebalance(void)
{
int pri, npri, x;
int pri, npri;
Schedq *rq;
Proc *p;
ulong t;
@ -526,6 +545,8 @@ rebalance(void)
return;
balancetime = t;
assert(!islo());
for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
another:
p = rq->head;
@ -533,15 +554,16 @@ another:
continue;
if(pri == p->basepri)
continue;
updatecpu(p);
npri = reprioritize(p);
if(npri != pri){
x = splhi();
p = dequeueproc(rq, p);
if(p != nil)
queueproc(&runq[npri], p);
splx(x);
goto another;
if(p != nil){
p->state = Scheding;
if(queueproc(&runq[npri], p) < 0)
iprint("rebalance: queueproc %lud %s %s\n",
p->pid, p->text, statename[p->state]);
goto another;
}
}
}
}
@ -606,10 +628,6 @@ found:
p = dequeueproc(rq, p);
if(p == nil)
goto loop;
p->state = Scheding;
p->mp = MACHP(m->machno);
if(edflock(p)){
edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */
edfunlock();
@ -831,12 +849,6 @@ interrupted(void)
error(Eintr);
}
static int
tfn(void *arg)
{
return up->trend == nil || up->tfn(arg);
}
void
twakeup(Ureg*, Timer *t)
{
@ -851,6 +863,12 @@ twakeup(Ureg*, Timer *t)
}
}
static int
tfn(void *arg)
{
return up->trend == nil || up->tfn(arg);
}
void
tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
{
@ -860,8 +878,8 @@ tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
timerdel(up);
}
up->tns = MS2NS(ms);
up->tf = twakeup;
up->tmode = Trelative;
up->tf = twakeup;
up->ta = up;
up->trend = r;
up->tfn = fn;
@ -1342,6 +1360,10 @@ pexit(char *exitstr, int freemem)
qunlock(&up->seglock);
edfstop(up);
if(up->edf != nil){
free(up->edf);
up->edf = nil;
}
up->state = Moribund;
sched();
panic("pexit");