On Wed, Sep 04, 2024 at 05:01:39PM +0200, Martin Wilck wrote: > On Wed, 2024-08-28 at 18:17 -0400, Benjamin Marzinski wrote: <snip> > > diff --git a/libmultipath/checkers/tur.c > > b/libmultipath/checkers/tur.c > > index a2905af5..95af5214 100644 > > --- a/libmultipath/checkers/tur.c > > +++ b/libmultipath/checkers/tur.c > > @@ -323,6 +323,49 @@ static int tur_check_async_timeout(struct > > checker *c) > > return (now.tv_sec > ct->time); > > } > > > > +int check_pending(struct checker *c, struct timespec endtime) > > +{ > > + struct tur_checker_context *ct = c->context; > > + int r, tur_status = PATH_PENDING; > > + > > + pthread_mutex_lock(&ct->lock); > > + > > + for (r = 0; > > + r == 0 && ct->state == PATH_PENDING && > > + ct->msgid == MSG_TUR_RUNNING; > > + r = pthread_cond_timedwait(&ct->active, &ct->lock, > > &endtime)); > > + > > + if (!r) { > > + tur_status = ct->state; > > + c->msgid = ct->msgid; > > + } > > + pthread_mutex_unlock(&ct->lock); > > + if (tur_status == PATH_PENDING && c->msgid == > > MSG_TUR_RUNNING) { > > + condlog(4, "%d:%d : tur checker still running", > > + major(ct->devt), minor(ct->devt)); > > + } else { > > + int running = uatomic_xchg(&ct->running, 0); > > + if (running) > > + pthread_cancel(ct->thread); > > + ct->thread = 0; > > + } > > + > > + return tur_status; > > +} > > + > > +int libcheck_pending(struct checker *c) > > +{ > > + struct timespec endtime; > > + struct tur_checker_context *ct = c->context; > > + > > + /* The if path checker isn't running, just return the > > exiting value. */ > > + if (!ct || !ct->thread) > > + return c->path_state; > > + > > + get_monotonic_time(&endtime); > > + return check_pending(c, endtime); > > Does this work? > > https://pubs.opengroup.org/onlinepubs/009695299/functions/pthread_cond_timedwait.html > says that "an error is returned [...] if the absolute time specified by > abstime has already been passed at the time of the call". > > Which would always be the case if you pass a timestamp that you > fetched previously unmodified, meaning that you'd always get ETIMEDOUT. > Or am I misreading the docs? No. You are reading the docs right. I'm just don't understand why that's a problem. When you start the checker, you set a timeout that you want to wait for, to give the checker a chance to complete in this checker loop. Once that timeout has passed, if the thread is still running, then libcheck_pending should return PATH_PENDING. pthread_cond_timedwait() doesn't get called unless the thread is running. So I would argue that pthread_cond_timedwait() always failing once that timeout has passed is the correct thing to do. Or are you arguing for having check_pending() manually check if the timeout has passed, and skipping the call to pthread_cond_timedwait() in that case? With the directio checker, we need to call get_events() even once our timeout has passed, since we need to handle any async IOs that have completed since the last call to check_pending(). But in there's no need to call pthread_cond_timedwait() for the tur checker, so we could skip it if we saw that the timeout had already passed. I would argue that this is just duplicating the check from pthread_cond_timedwait() in check_pending() for no real benfit though. Or am I missing something here? -Ben > Martin > > > > > > > > > +} > > + > > int libcheck_check(struct checker * c) > > { > > struct tur_checker_context *ct = c->context; > > @@ -437,27 +480,7 @@ int libcheck_check(struct checker * c) > > return tur_check(c->fd, c->timeout, &c- > > >msgid); > > } > > tur_timeout(&tsp); > > - pthread_mutex_lock(&ct->lock); > > - > > - for (r = 0; > > - r == 0 && ct->state == PATH_PENDING && > > - ct->msgid == MSG_TUR_RUNNING; > > - r = pthread_cond_timedwait(&ct->active, &ct- > > >lock, &tsp)); > > - > > - if (!r) { > > - tur_status = ct->state; > > - c->msgid = ct->msgid; > > - } > > - pthread_mutex_unlock(&ct->lock); > > - if (tur_status == PATH_PENDING && c->msgid == > > MSG_TUR_RUNNING) { > > - condlog(4, "%d:%d : tur checker still > > running", > > - major(ct->devt), minor(ct->devt)); > > - } else { > > - int running = uatomic_xchg(&ct->running, 0); > > - if (running) > > - pthread_cancel(ct->thread); > > - ct->thread = 0; > > - } > > + tur_status = check_pending(c, tsp); > > } > > > > return tur_status;