From eaffa1ef55825c34e138246fe5db5bbf996a8dbb Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Fri, 5 Jan 2024 02:12:42 +0000 Subject: [PATCH] kernel: fix EDF scheduler double ready() and more robust double-ready detection Move the "double-ready" check into queueproc() function, doing it while holding the runq lock, meaning all transitions to Ready state are serialized. We do not just check for double-ready but for any "illegal" transisions: ready() on Dead, Moribund, New, Ready, Running and Waitrelease is not allowed. ready() on Queueing*, Wakeme, Broken, Stopped and Rendez is only valid when done from another process. For rescheduling, we have to go to Scheding state before calling ready(). (rebalance(), schedinit()...) The EDF scheduler had this bug where it could ready() multiple times as it was staying in Waitrelease state after releasing the edflock. Now it transitions thru Scheding avoiding the issue. --- sys/src/9/port/edf.c | 51 ++------ sys/src/9/port/portclock.c | 2 +- sys/src/9/port/portdat.h | 1 - sys/src/9/port/proc.c | 232 ++++++++++++++++++++----------------- 4 files changed, 136 insertions(+), 150 deletions(-) diff --git a/sys/src/9/port/edf.c b/sys/src/9/port/edf.c index 33ed501d2..041924d84 100644 --- a/sys/src/9/port/edf.c +++ b/sys/src/9/port/edf.c @@ -209,7 +209,6 @@ static void releaseintr(Ureg *u, Timer *t) { Proc *p; - Schedq *rq; if(panicking || active.exiting) return; @@ -224,21 +223,17 @@ releaseintr(Ureg *u, Timer *t) return; case Ready: /* remove proc from current runq */ - rq = &runq[p->priority]; - if(dequeueproc(rq, p) != p){ + if(dequeueproc(&runq[p->priority], p) != p){ DPRINT("releaseintr: can't find proc or lock race\n"); release(p); /* It'll start best effort */ edfunlock(); return; } - p->state = Waitrelease; /* fall through */ case Waitrelease: + p->state = Scheding; release(p); edfunlock(); - if(p->state == Wakeme){ - iprint("releaseintr: wakeme\n"); - } ready(p); if(up){ up->delaysched++; @@ -412,13 +407,13 @@ edfadmit(Proc *p) DPRINT("%lud edfadmit other %lud[%s], release at %lud\n", now, p->pid, statename[p->state], e->t); if(e->tt == nil){ - e->tf = releaseintr; - e->ta = p; tns = e->t - now; if(tns < 20) tns = 20; e->tns = 1000LL * tns; e->tmode = Trelative; + e->tf = releaseintr; + e->ta = p; timeradd(e); } } @@ -476,8 +471,8 @@ edfyield(void) if(n < 20) n = 20; up->tns = 1000LL * n; - up->tf = releaseintr; up->tmode = Trelative; + up->tf = releaseintr; up->ta = up; up->trend = &up->sleep; timeradd(up); @@ -488,6 +483,8 @@ edfyield(void) nexterror(); } sleep(&up->sleep, yfn, nil); + up->trend = nil; + timerdel(up); poperror(); } @@ -495,17 +492,10 @@ int edfready(Proc *p) { Edf *e; - Schedq *rq; - Proc *l, *pp; - void (*pt)(Proc*, int, vlong); long n; if((e = edflock(p)) == nil) return 0; - - if(p->state == Wakeme && p->r){ - iprint("edfready: wakeme\n"); - } if(e->d - now <= 0){ /* past deadline, arrange for next release */ if((e->flags & Sporadic) == 0){ @@ -550,7 +540,7 @@ edfready(Proc *p) now, p->pid, statename[p->state], e->t); p->state = Waitrelease; edfunlock(); - return 1; /* Make runnable later */ + return -1; /* Make runnable later */ } DPRINT("%lud edfready %lud %s release now\n", now, p->pid, statename[p->state]); /* release now */ @@ -558,31 +548,6 @@ edfready(Proc *p) } edfunlock(); DPRINT("^"); - rq = &runq[PriEdf]; - /* insert in queue in earliest deadline order */ - lock(runq); - l = nil; - for(pp = rq->head; pp; pp = pp->rnext){ - if(pp->edf->d > e->d) - break; - l = pp; - } - p->rnext = pp; - if (l == nil) - rq->head = p; - else - l->rnext = p; - if(pp == nil) - rq->tail = p; - rq->n++; - nrdy++; - runvec |= 1 << PriEdf; - p->priority = PriEdf; - p->readytime = m->ticks; - p->state = Ready; - unlock(runq); - if(p->trace && (pt = proctrace)) - pt(p, SReady, 0); return 1; } diff --git a/sys/src/9/port/portclock.c b/sys/src/9/port/portclock.c index a1bef59a4..e5b41e5d6 100644 --- a/sys/src/9/port/portclock.c +++ b/sys/src/9/port/portclock.c @@ -136,7 +136,7 @@ timerdel(Timer *dt) /* rare, but tf can still be active on another cpu */ while(dt->tactive == mp && dt->tt == nil) - if(up->nlocks == 0 && islo()) + if(up->state == Running && up->nlocks == 0 && islo()) sched(); } diff --git a/sys/src/9/port/portdat.h b/sys/src/9/port/portdat.h index ca4387d03..5bd999b3a 100644 --- a/sys/src/9/port/portdat.h +++ b/sys/src/9/port/portdat.h @@ -769,7 +769,6 @@ struct Proc ulong cpu; /* cpu average */ ulong lastupdate; uchar yield; /* non-zero if the process just did a sleep(0) */ - ulong readytime; /* time process came ready */ int preempted; /* true if this process hasn't finished the interrupt * that last preempted it */ diff --git a/sys/src/9/port/proc.c b/sys/src/9/port/proc.c index 2d3e48300..25d9467c8 100644 --- a/sys/src/9/port/proc.c +++ b/sys/src/9/port/proc.c @@ -12,8 +12,8 @@ int schedgain = 30; /* units in seconds */ int nrdy; -void updatecpu(Proc*); -int reprioritize(Proc*); +static void updatecpu(Proc*); +static int reprioritize(Proc*); ulong delayedscheds; /* statistics */ ulong skipscheds; @@ -78,31 +78,28 @@ schedinit(void) /* never returns */ updatecpu(up); break; case Running: + up->state = Scheding; ready(up); break; case Moribund: mmurelease(up); - up->state = Dead; - edfstop(up); - if(up->edf != nil){ - free(up->edf); - up->edf = nil; - } lock(&procalloc); + up->state = Dead; up->mach = nil; up->qnext = procalloc.free; procalloc.free = up; /* proc is free now, make sure unlock() wont touch it */ up = procalloc.Lock.p = nil; unlock(&procalloc); - - sched(); + goto out; } coherence(); up->mach = nil; up = nil; } +out: sched(); + panic("schedinit"); } int @@ -164,15 +161,12 @@ procswitch(void) void sched(void) { - Proc *p; - if(m->ilockdepth) - panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", + panic("cpu%d: ilockdepth %d, last lock %#p at %#p", m->machno, m->ilockdepth, up != nil ? up->lastilock: nil, - (up != nil && up->lastilock != nil) ? up->lastilock->pc: 0, - getcallerpc(&p+2)); + (up != nil && up->lastilock != nil) ? up->lastilock->pc: 0); if(up != nil) { /* * Delay the sched until the process gives up the locks @@ -204,18 +198,15 @@ sched(void) spllo(); return; } - p = runproc(); - if(p->edf == nil){ - updatecpu(p); - p->priority = reprioritize(p); - } - if(p != m->readied) + up = runproc(); + if(up->edf == nil) + up->priority = reprioritize(up); + if(up != m->readied) m->schedticks = m->ticks + HZ/10; m->readied = nil; - up = p; - up->state = Running; - up->mach = MACHP(m->machno); m->proc = up; + up->mach = up->mp = MACHP(m->machno); + up->state = Running; mmuswitch(up); gotolabel(&up->sched); } @@ -310,7 +301,7 @@ preempted(int clockintr) * to maintain accurate cpu usage statistics. It can be called * at any time to bring the stats for a given proc up-to-date. */ -void +static void updatecpu(Proc *p) { ulong t, ocpu, n, D; @@ -348,11 +339,12 @@ updatecpu(Proc *p) * of 3 means you're just right. Having a higher priority (up to p->basepri) * means you're not using as much as you could. */ -int +static int reprioritize(Proc *p) { int fairshare, n, load, ratio; + updatecpu(p); load = MACHP(0)->load; if(load == 0) return p->basepri; @@ -378,26 +370,100 @@ reprioritize(Proc *p) /* * add a process to a scheduling queue */ -void +static int queueproc(Schedq *rq, Proc *p) { - int pri; + int pri = rq - runq; - pri = rq - runq; lock(runq); + switch(p->state){ + case New: + case Queueing: + case QueueingR: + case QueueingW: + case Wakeme: + case Broken: + case Stopped: + case Rendezvous: + if(p != up) + break; + /* wet floor */ + case Dead: + case Moribund: + case Ready: + case Running: + case Waitrelease: + unlock(runq); + return -1; + } + p->state = Ready; p->priority = pri; - p->rnext = nil; - if(rq->tail != nil) - rq->tail->rnext = p; - else - rq->head = p; - rq->tail = p; + if(pri == PriEdf){ + Proc *pp, *l; + + /* insert in queue in earliest deadline order */ + l = nil; + for(pp = rq->head; pp != nil; pp = pp->rnext){ + if(pp->edf->d > p->edf->d) + break; + l = pp; + } + p->rnext = pp; + if(l == nil) + rq->head = p; + else + l->rnext = p; + if(pp == nil) + rq->tail = p; + } else { + p->rnext = nil; + if(rq->tail != nil) + rq->tail->rnext = p; + else + rq->head = p; + rq->tail = p; + } rq->n++; nrdy++; runvec |= 1<text, p->pid, statename[p->state], getcallerpc(&p)); + } else { + void (*pt)(Proc*, int, vlong); + pt = proctrace; + if(pt != nil) + pt(p, SReady, 0); + } + splx(s); +} + + /* * try to remove a process from a scheduling queue (called splhi) */ @@ -437,62 +503,15 @@ dequeueproc(Schedq *rq, Proc *tp) runvec &= ~(1<<(rq-runq)); rq->n--; nrdy--; - if(p->state != Ready) - print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]); - + if(p->state != Ready){ + iprint("dequeueproc %s %lud %s pc %p\n", + p->text, p->pid, statename[p->state], getcallerpc(&rq)); + p = nil; + } unlock(runq); return p; } -/* - * ready(p) picks a new priority for a process and sticks it in the - * runq for that priority. - */ -void -ready(Proc *p) -{ - int s, pri; - Schedq *rq; - void (*pt)(Proc*, int, vlong); - - switch(p->state){ - case Running: - if(p == up) - break; - /* wet floor */ - case Dead: - case Moribund: - case Scheding: - print("ready %s %s %lud pc %p\n", statename[p->state], - p->text, p->pid, getcallerpc(&p)); - return; - case Ready: - print("double ready %s %lud pc %p\n", - p->text, p->pid, getcallerpc(&p)); - return; - } - - s = splhi(); - if(edfready(p)){ - splx(s); - return; - } - - if(up != p && (p->wired == nil || p->wired == MACHP(m->machno))) - m->readied = p; /* group scheduling */ - - updatecpu(p); - pri = reprioritize(p); - p->priority = pri; - rq = &runq[pri]; - p->state = Ready; - queueproc(rq, p); - pt = proctrace; - if(pt != nil) - pt(p, SReady, 0); - splx(s); -} - /* * yield the processor and drop our priority */ @@ -516,7 +535,7 @@ ulong balancetime; static void rebalance(void) { - int pri, npri, x; + int pri, npri; Schedq *rq; Proc *p; ulong t; @@ -526,6 +545,8 @@ rebalance(void) return; balancetime = t; + assert(!islo()); + for(pri=0, rq=runq; prihead; @@ -533,15 +554,16 @@ another: continue; if(pri == p->basepri) continue; - updatecpu(p); npri = reprioritize(p); if(npri != pri){ - x = splhi(); p = dequeueproc(rq, p); - if(p != nil) - queueproc(&runq[npri], p); - splx(x); - goto another; + if(p != nil){ + p->state = Scheding; + if(queueproc(&runq[npri], p) < 0) + iprint("rebalance: queueproc %lud %s %s\n", + p->pid, p->text, statename[p->state]); + goto another; + } } } } @@ -606,10 +628,6 @@ found: p = dequeueproc(rq, p); if(p == nil) goto loop; - - p->state = Scheding; - p->mp = MACHP(m->machno); - if(edflock(p)){ edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */ edfunlock(); @@ -831,12 +849,6 @@ interrupted(void) error(Eintr); } -static int -tfn(void *arg) -{ - return up->trend == nil || up->tfn(arg); -} - void twakeup(Ureg*, Timer *t) { @@ -851,6 +863,12 @@ twakeup(Ureg*, Timer *t) } } +static int +tfn(void *arg) +{ + return up->trend == nil || up->tfn(arg); +} + void tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms) { @@ -860,8 +878,8 @@ tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms) timerdel(up); } up->tns = MS2NS(ms); - up->tf = twakeup; up->tmode = Trelative; + up->tf = twakeup; up->ta = up; up->trend = r; up->tfn = fn; @@ -1342,6 +1360,10 @@ pexit(char *exitstr, int freemem) qunlock(&up->seglock); edfstop(up); + if(up->edf != nil){ + free(up->edf); + up->edf = nil; + } up->state = Moribund; sched(); panic("pexit");