On Sun, Nov 03, 2024 at 09:56:40PM +0100, Martin Wilck wrote: > On Mon, 2024-10-14 at 23:28 -0400, Benjamin Marzinski wrote: > > Instead of counting the number of times the path checker has been > > called and treating that as the number of seconds that have passed, > > calculate the actual timestamp when the checker will time out, and > > check that instead. > > > > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > > --- > > libmultipath/checkers/directio.c | 44 ++++++++++++++++++++++-------- > > -- > > 1 file changed, 31 insertions(+), 13 deletions(-) > > > > diff --git a/libmultipath/checkers/directio.c > > b/libmultipath/checkers/directio.c > > index 27227894..d35a6bac 100644 > > --- a/libmultipath/checkers/directio.c > > +++ b/libmultipath/checkers/directio.c > > @@ -60,13 +60,28 @@ const char *libcheck_msgtable[] = { > > #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt, > > ##args) > > > > struct directio_context { > > - unsigned int running; > > - int reset_flags; > > + time_t timeout; > > > I'd have used a struct timespec here rather than time_t. Sure. > > > + int reset_flags; > > struct aio_group *aio_grp; > > struct async_req *req; > > bool checked_state; > > }; > > > > +bool is_running(struct directio_context *ct) { > > + return (ct->timeout != 0); > > +} > > + > > +void start_running(struct directio_context *ct, int timeout_secs) { > > + struct timespec now; > > + > > + get_monotonic_time(&now); > > + ct->timeout = now.tv_sec + timeout_secs; > > +} > > + > > +void stop_running(struct directio_context *ct) { > > + ct->timeout = 0; > > +} > > + > > I think the 3 functions above should be static. Oops. Yeah. Sending a new version. -Ben > > > static struct aio_group * > > add_aio_group(void) > > { > > @@ -234,9 +249,9 @@ void libcheck_free (struct checker * c) > > } > > } > > > > - if (ct->running && ct->req->state != PATH_PENDING) > > - ct->running = 0; > > - if (!ct->running) { > > + if (is_running(ct) && ct->req->state != PATH_PENDING) > > + stop_running(ct); > > + if (!is_running(ct)) { > > free(ct->req->buf); > > free(ct->req); > > ct->aio_grp->holders--; > > @@ -304,7 +319,7 @@ check_pending(struct directio_context *ct, struct > > timespec timeout) > > r = get_events(ct->aio_grp, &timeout); > > > > if (ct->req->state != PATH_PENDING) { > > - ct->running = 0; > > + stop_running(ct); > > return; > > } else if (r == 0 || > > (timeout.tv_sec == 0 && timeout.tv_nsec > > == 0)) > > @@ -330,10 +345,10 @@ check_state(int fd, struct directio_context > > *ct, int sync, int timeout_secs) > > if (sync > 0) > > LOG(4, "called in synchronous mode"); > > > > - if (ct->running) { > > + if (is_running(ct)) { > > ct->checked_state = true; > > if (ct->req->state != PATH_PENDING) { > > - ct->running = 0; > > + stop_running(ct); > > return ct->req->state; > > } > > } else { > > @@ -348,9 +363,9 @@ check_state(int fd, struct directio_context *ct, > > int sync, int timeout_secs) > > LOG(3, "io_submit error %i", -rc); > > return PATH_UNCHECKED; > > } > > + start_running(ct, timeout_secs); > > ct->checked_state = false; > > } > > - ct->running++; > > if (!sync) > > return PATH_PENDING; > > > > @@ -388,7 +403,7 @@ static void set_msgid(struct checker *c, int > > state) > > bool libcheck_need_wait(struct checker *c) > > { > > struct directio_context *ct = (struct directio_context *)c- > > >context; > > - return (ct && ct->running && ct->req->state == PATH_PENDING > > && > > + return (ct && is_running(ct) && ct->req->state == > > PATH_PENDING && > > !ct->checked_state); > > } > > > > @@ -400,7 +415,7 @@ int libcheck_pending(struct checker *c) > > struct timespec no_wait = { .tv_sec = 0 }; > > > > /* The if path checker isn't running, just return the > > exiting value. */ > > - if (!ct || !ct->running) { > > + if (!ct || !is_running(ct)) { > > rc = c->path_state; > > goto out; > > } > > @@ -408,10 +423,13 @@ int libcheck_pending(struct checker *c) > > if (ct->req->state == PATH_PENDING) > > check_pending(ct, no_wait); > > else > > - ct->running = 0; > > + stop_running(ct); > > rc = ct->req->state; > > if (rc == PATH_PENDING) { > > - if (ct->running > c->timeout) { > > + struct timespec now; > > + > > + get_monotonic_time(&now); > > + if (now.tv_sec > ct->timeout) { > > We could timeout a second too late here, that's why I'd suggest using > timespec. > > > LOG(3, "abort check on timeout"); > > io_cancel(ct->aio_grp->ioctx, &ct->req->io, > > &event); > > rc = PATH_DOWN; > > Regards > Martin