On Wed, 2024-08-28 at 18:17 -0400, Benjamin Marzinski wrote: > When the tur and directio checkers start an asynchronous checker, > they > now immediately return with the path in PATH_PENDING, instead of > waiting in checker_check(). Instead the wait now happens in > checker_get_state(). > > Additionally, the directio checker now waits for 1 ms, like the tur > checker does. Also like the tur checker it now only waits once. If it > is still pending after the first call to checker_get_state(). It will > not wait at all on future calls, and will just process the already > completed IOs. > > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> Now I understand what you're doing, but I find it somewhat confusing. I believe this can be simplified after the split between starting the checkers and looking at their result. Why not just fire off the checkers, wait 1ms for _all_ the just started checkers in checkerloop() [*], and then do the pending test like it is now (but just in polling mode, with no c->timeout)? Regards Martin [*] or maybe even a little more, 5ms, say: we'd still be waiting far less time than we used to, but greatly increase the odds that most checkers actually complete. > --- > libmultipath/checkers.c | 7 +++- > libmultipath/checkers/directio.c | 57 +++++++++++++++++------------- > -- > libmultipath/checkers/tur.c | 13 +++----- > 3 files changed, 41 insertions(+), 36 deletions(-) > > diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c > index 298aec78..b44b856a 100644 > --- a/libmultipath/checkers.c > +++ b/libmultipath/checkers.c > @@ -303,7 +303,12 @@ void checker_put (struct checker * dst) > > int checker_get_state(struct checker *c) > { > - return c ? c->path_state : PATH_UNCHECKED; > + if (!c) > + return PATH_UNCHECKED; > + if (c->path_state != PATH_PENDING || !c->cls->pending) > + return c->path_state; > + c->path_state = c->cls->pending(c); > + return c->path_state; > } > > void checker_check (struct checker * c, int path_state) > diff --git a/libmultipath/checkers/directio.c > b/libmultipath/checkers/directio.c > index 3f88b40d..904e3071 100644 > --- a/libmultipath/checkers/directio.c > +++ b/libmultipath/checkers/directio.c > @@ -60,10 +60,11 @@ const char *libcheck_msgtable[] = { > #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, > ##args) > > struct directio_context { > - int running; > + unsigned int running; > int reset_flags; > struct aio_group *aio_grp; > struct async_req *req; > + struct timespec endtime; > }; > > static struct aio_group * > @@ -314,19 +315,16 @@ check_pending(struct directio_context *ct, > struct timespec endtime) > static int > check_state(int fd, struct directio_context *ct, int sync, int > timeout_secs) > { > - struct timespec timeout = { .tv_nsec = 1000 }; > struct stat sb; > int rc; > + struct io_event event; > struct timespec endtime; > > if (fstat(fd, &sb) == 0) { > LOG(4, "called for %x", (unsigned) sb.st_rdev); > } > - if (sync > 0) { > + if (sync > 0) > LOG(4, "called in synchronous mode"); > - timeout.tv_sec = timeout_secs; > - timeout.tv_nsec = 0; > - } > > if (ct->running) { > if (ct->req->state != PATH_PENDING) { > @@ -345,31 +343,26 @@ check_state(int fd, struct directio_context > *ct, int sync, int timeout_secs) > LOG(3, "io_submit error %i", -rc); > return PATH_UNCHECKED; > } > + get_monotonic_time(&ct->endtime); > + ct->endtime.tv_nsec += 1000 * 1000; > + normalize_timespec(&ct->endtime); > } > ct->running++; > + if (!sync) > + return PATH_PENDING; > > get_monotonic_time(&endtime); > - endtime.tv_sec += timeout.tv_sec; > - endtime.tv_nsec += timeout.tv_nsec; > + endtime.tv_sec += timeout_secs; > normalize_timespec(&endtime); > > check_pending(ct, endtime); > if (ct->req->state != PATH_PENDING) > return ct->req->state; > > - if (ct->running > timeout_secs || sync) { > - struct io_event event; > - > - LOG(3, "abort check on timeout"); > - > - io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event); > - rc = PATH_DOWN; > - } else { > - LOG(4, "async io pending"); > - rc = PATH_PENDING; > - } > + LOG(3, "abort check on timeout"); > > - return rc; > + io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event); > + return PATH_DOWN; > } > > static void set_msgid(struct checker *c, int state) > @@ -395,21 +388,31 @@ static void set_msgid(struct checker *c, int > state) > > int libcheck_pending(struct checker *c) > { > - struct timespec endtime; > + int rc; > + struct io_event event; > struct directio_context *ct = (struct directio_context *)c- > >context; > > /* The if path checker isn't running, just return the > exiting value. */ > if (!ct || !ct->running) > return c->path_state; > > - if (ct->req->state == PATH_PENDING) { > - get_monotonic_time(&endtime); > - check_pending(ct, endtime); > - } else > + if (ct->req->state == PATH_PENDING) > + check_pending(ct, ct->endtime); > + else > ct->running = 0; > - set_msgid(c, ct->req->state); > + rc = ct->req->state; > + if (rc == PATH_PENDING) { > + if (ct->running > c->timeout) { > + LOG(3, "abort check on timeout"); > + io_cancel(ct->aio_grp->ioctx, &ct->req->io, > &event); > + rc = PATH_DOWN; > + } > + else > + LOG(4, "async io pending"); > + } > + set_msgid(c, rc); > > - return ct->req->state; > + return rc; > } > > int libcheck_check (struct checker * c) > diff --git a/libmultipath/checkers/tur.c > b/libmultipath/checkers/tur.c > index 95af5214..81db565b 100644 > --- a/libmultipath/checkers/tur.c > +++ b/libmultipath/checkers/tur.c > @@ -58,6 +58,7 @@ struct tur_checker_context { > int msgid; > struct checker_context ctx; > unsigned int nr_timeouts; > + struct timespec endtime; > }; > > int libcheck_init (struct checker * c) > @@ -323,7 +324,7 @@ static int tur_check_async_timeout(struct checker > *c) > return (now.tv_sec > ct->time); > } > > -int check_pending(struct checker *c, struct timespec endtime) > +int check_pending(struct checker *c) > { > struct tur_checker_context *ct = c->context; > int r, tur_status = PATH_PENDING; > @@ -333,7 +334,7 @@ int check_pending(struct checker *c, struct > timespec endtime) > for (r = 0; > r == 0 && ct->state == PATH_PENDING && > ct->msgid == MSG_TUR_RUNNING; > - r = pthread_cond_timedwait(&ct->active, &ct->lock, > &endtime)); > + r = pthread_cond_timedwait(&ct->active, &ct->lock, &ct- > >endtime)); > > if (!r) { > tur_status = ct->state; > @@ -355,21 +356,18 @@ int check_pending(struct checker *c, struct > timespec endtime) > > int libcheck_pending(struct checker *c) > { > - struct timespec endtime; > struct tur_checker_context *ct = c->context; > > /* The if path checker isn't running, just return the > exiting value. */ > if (!ct || !ct->thread) > return c->path_state; > > - get_monotonic_time(&endtime); > - return check_pending(c, endtime); > + return check_pending(c); > } > > int libcheck_check(struct checker * c) > { > struct tur_checker_context *ct = c->context; > - struct timespec tsp; > pthread_attr_t attr; > int tur_status, r; > > @@ -479,8 +477,7 @@ int libcheck_check(struct checker * c) > " sync mode", major(ct->devt), > minor(ct->devt)); > return tur_check(c->fd, c->timeout, &c- > >msgid); > } > - tur_timeout(&tsp); > - tur_status = check_pending(c, tsp); > + tur_timeout(&ct->endtime); > } > > return tur_status;