Re: [PATCH 04/15] libmultipath: remove pending wait code from libcheck_check calls

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2024-08-28 at 18:17 -0400, Benjamin Marzinski wrote:
> When the tur and directio checkers start an asynchronous checker,
> they
> now immediately return with the path in PATH_PENDING, instead of
> waiting in checker_check(). Instead the wait now happens in
> checker_get_state().
> 
> Additionally, the directio checker now waits for 1 ms, like the tur
> checker does. Also like the tur checker it now only waits once. If it
> is still pending after the first call to checker_get_state(). It will
> not wait at all on future calls, and will just process the already
> completed IOs.
> 
> Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx>

Now I understand what you're doing, but I find it somewhat confusing. I
believe this can be simplified after the split between starting the
checkers and looking at their result.

Why not just fire off the checkers, wait 1ms for _all_ the just started
checkers in checkerloop() [*], and then do the pending test like it is
now (but just in polling mode, with no c->timeout)?

Regards
Martin

[*] or maybe even a little more, 5ms, say: we'd still be waiting far
less time than we used to, but greatly increase the odds that most
checkers actually complete.


> ---
>  libmultipath/checkers.c          |  7 +++-
>  libmultipath/checkers/directio.c | 57 +++++++++++++++++-------------
> --
>  libmultipath/checkers/tur.c      | 13 +++-----
>  3 files changed, 41 insertions(+), 36 deletions(-)
> 
> diff --git a/libmultipath/checkers.c b/libmultipath/checkers.c
> index 298aec78..b44b856a 100644
> --- a/libmultipath/checkers.c
> +++ b/libmultipath/checkers.c
> @@ -303,7 +303,12 @@ void checker_put (struct checker * dst)
>  
>  int checker_get_state(struct checker *c)
>  {
> -	return c ? c->path_state : PATH_UNCHECKED;
> +	if (!c)
> +		return PATH_UNCHECKED;
> +	if (c->path_state != PATH_PENDING || !c->cls->pending)
> +		return c->path_state;
> +	c->path_state = c->cls->pending(c);
> +	return c->path_state;
>  }
>  
>  void checker_check (struct checker * c, int path_state)
> diff --git a/libmultipath/checkers/directio.c
> b/libmultipath/checkers/directio.c
> index 3f88b40d..904e3071 100644
> --- a/libmultipath/checkers/directio.c
> +++ b/libmultipath/checkers/directio.c
> @@ -60,10 +60,11 @@ const char *libcheck_msgtable[] = {
>  #define LOG(prio, fmt, args...) condlog(prio, "directio: " fmt,
> ##args)
>  
>  struct directio_context {
> -	int		running;
> +	unsigned int	running;
>  	int		reset_flags;
>  	struct aio_group *aio_grp;
>  	struct async_req *req;
> +	struct timespec endtime;
>  };
>  
>  static struct aio_group *
> @@ -314,19 +315,16 @@ check_pending(struct directio_context *ct,
> struct timespec endtime)
>  static int
>  check_state(int fd, struct directio_context *ct, int sync, int
> timeout_secs)
>  {
> -	struct timespec	timeout = { .tv_nsec = 1000 };
>  	struct stat	sb;
>  	int		rc;
> +	struct io_event event;
>  	struct timespec endtime;
>  
>  	if (fstat(fd, &sb) == 0) {
>  		LOG(4, "called for %x", (unsigned) sb.st_rdev);
>  	}
> -	if (sync > 0) {
> +	if (sync > 0)
>  		LOG(4, "called in synchronous mode");
> -		timeout.tv_sec  = timeout_secs;
> -		timeout.tv_nsec = 0;
> -	}
>  
>  	if (ct->running) {
>  		if (ct->req->state != PATH_PENDING) {
> @@ -345,31 +343,26 @@ check_state(int fd, struct directio_context
> *ct, int sync, int timeout_secs)
>  			LOG(3, "io_submit error %i", -rc);
>  			return PATH_UNCHECKED;
>  		}
> +		get_monotonic_time(&ct->endtime);
> +		ct->endtime.tv_nsec += 1000 * 1000;
> +		normalize_timespec(&ct->endtime);
>  	}
>  	ct->running++;
> +	if (!sync)
> +		return PATH_PENDING;
>  
>  	get_monotonic_time(&endtime);
> -	endtime.tv_sec += timeout.tv_sec;
> -	endtime.tv_nsec += timeout.tv_nsec;
> +	endtime.tv_sec += timeout_secs;
>  	normalize_timespec(&endtime);
>  
>  	check_pending(ct, endtime);
>  	if (ct->req->state != PATH_PENDING)
>  		return ct->req->state;
>  
> -	if (ct->running > timeout_secs || sync) {
> -		struct io_event event;
> -
> -		LOG(3, "abort check on timeout");
> -
> -		io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
> -		rc = PATH_DOWN;
> -	} else {
> -		LOG(4, "async io pending");
> -		rc = PATH_PENDING;
> -	}
> +	LOG(3, "abort check on timeout");
>  
> -	return rc;
> +	io_cancel(ct->aio_grp->ioctx, &ct->req->io, &event);
> +	return PATH_DOWN;
>  }
>  
>  static void set_msgid(struct checker *c, int state)
> @@ -395,21 +388,31 @@ static void set_msgid(struct checker *c, int
> state)
>  
>  int libcheck_pending(struct checker *c)
>  {
> -	struct timespec endtime;
> +	int rc;
> +	struct io_event event;
>  	struct directio_context *ct = (struct directio_context *)c-
> >context;
>  
>  	/* The if path checker isn't running, just return the
> exiting value. */
>  	if (!ct || !ct->running)
>  		return c->path_state;
>  
> -	if (ct->req->state == PATH_PENDING) {
> -		get_monotonic_time(&endtime);
> -		check_pending(ct, endtime);
> -	} else
> +	if (ct->req->state == PATH_PENDING)
> +		check_pending(ct, ct->endtime);
> +	else
>  		ct->running = 0;
> -	set_msgid(c, ct->req->state);
> +	rc = ct->req->state;
> +	if (rc == PATH_PENDING) {
> +		if (ct->running > c->timeout) {
> +			LOG(3, "abort check on timeout");
> +			io_cancel(ct->aio_grp->ioctx, &ct->req->io,
> &event);
> +			rc = PATH_DOWN;
> +		}
> +		else
> +			LOG(4, "async io pending");
> +	}
> +	set_msgid(c, rc);
>  
> -	return ct->req->state;
> +	return rc;
>  }
>  
>  int libcheck_check (struct checker * c)
> diff --git a/libmultipath/checkers/tur.c
> b/libmultipath/checkers/tur.c
> index 95af5214..81db565b 100644
> --- a/libmultipath/checkers/tur.c
> +++ b/libmultipath/checkers/tur.c
> @@ -58,6 +58,7 @@ struct tur_checker_context {
>  	int msgid;
>  	struct checker_context ctx;
>  	unsigned int nr_timeouts;
> +	struct timespec endtime;
>  };
>  
>  int libcheck_init (struct checker * c)
> @@ -323,7 +324,7 @@ static int tur_check_async_timeout(struct checker
> *c)
>  	return (now.tv_sec > ct->time);
>  }
>  
> -int check_pending(struct checker *c, struct timespec endtime)
> +int check_pending(struct checker *c)
>  {
>  	struct tur_checker_context *ct = c->context;
>  	int r, tur_status = PATH_PENDING;
> @@ -333,7 +334,7 @@ int check_pending(struct checker *c, struct
> timespec endtime)
>  	for (r = 0;
>  	     r == 0 && ct->state == PATH_PENDING &&
>  	     ct->msgid == MSG_TUR_RUNNING;
> -	     r = pthread_cond_timedwait(&ct->active, &ct->lock,
> &endtime));
> +	     r = pthread_cond_timedwait(&ct->active, &ct->lock, &ct-
> >endtime));
>  
>  	if (!r) {
>  		tur_status = ct->state;
> @@ -355,21 +356,18 @@ int check_pending(struct checker *c, struct
> timespec endtime)
>  
>  int libcheck_pending(struct checker *c)
>  {
> -	struct timespec endtime;
>  	struct tur_checker_context *ct = c->context;
>  
>  	/* The if path checker isn't running, just return the
> exiting value. */
>  	if (!ct || !ct->thread)
>  		return c->path_state;
>  
> -	get_monotonic_time(&endtime);
> -	return check_pending(c, endtime);
> +	return check_pending(c);
>  }
>  
>  int libcheck_check(struct checker * c)
>  {
>  	struct tur_checker_context *ct = c->context;
> -	struct timespec tsp;
>  	pthread_attr_t attr;
>  	int tur_status, r;
>  
> @@ -479,8 +477,7 @@ int libcheck_check(struct checker * c)
>  				" sync mode", major(ct->devt),
> minor(ct->devt));
>  			return tur_check(c->fd, c->timeout, &c-
> >msgid);
>  		}
> -		tur_timeout(&tsp);
> -		tur_status = check_pending(c, tsp);
> +		tur_timeout(&ct->endtime);
>  	}
>  
>  	return tur_status;






[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux