Re: [PATCH 1/6] zbd: Decrement open zones count at write command completion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2020-08-13 at 13:57 +0900, Shin'ichiro Kawasaki wrote:
> When max_open_zones or job_max_open_zones option is provided, fio counts
> number of open zones. This open zone accounting is done during submission
> of write commands. When write command is submitted to a zone for the
> first time, the zone is counted as open. When a write command which will
> fill the zone gets submitted, the zone is counted as closed. However,
> this count at write command submission may open zones more than the
> limit. When writes to zones more than the limit are submitted with high
> queue depth, those writes in-flight open zones more than the limit.
> 
> To avoid such writes beyond max open zones limit, decrement number of
> open zones count not at write command submission but at write command
> completion. By doing this, the number of zones with in-flight write
> commands are kept within the limit with accuracy. Introduce the helper
> function zbd_end_zone_io() for this decrement and zbd_close_zone() call.
> 
> The zbd_close_zone() function requires thread_data argument. To refer
> thread_data at write command completion, add the argument to zbd_put_io()
> and zbd_queue_io() callbacks. Also add a loop to the zbd_close_zone()
> function which converts zone_info array index to open_zones array index
> to avoid loop code duplication.
> 
> Even when io_u points to an open zone, the zone may not be valid for
> write since in-flight write commands may make the zone full. Check this
> in zbd_open_zone() to handle such zones as in full status.
> 
> Because of the zone close timing change, there might be no open zone when
> zbd_convert_to_open_zone() is called. Do not handle such case as an
> error and open other zones.
> 
> When zbd_convert_to_open_zone() finds all open zones can not be used for
> next write, it opens other zones. This zone open fails when the number of
> open zones hits one of the limits: 1) number of zones in the fio write
> target region, 2) max_open_zones option or 3) job_max_open_zones option.
> To avoid the zone open failure, wait for writes in-flight completes and
> open zones get closed before opening other zones.
> 
> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@xxxxxxx>
> ---
>  io_u.c      |  4 +--
>  io_u.h      |  5 +--
>  ioengines.c |  4 +--
>  zbd.c       | 92 +++++++++++++++++++++++++++++++++++++++++------------
>  zbd.h       |  9 +++---
>  5 files changed, 84 insertions(+), 30 deletions(-)
> 
> diff --git a/io_u.c b/io_u.c
> index 2ef5acec..62bfec47 100644
> --- a/io_u.c
> +++ b/io_u.c
> @@ -795,7 +795,7 @@ void put_io_u(struct thread_data *td, struct io_u *io_u)
>  {
>  	const bool needs_lock = td_async_processing(td);
>  
> -	zbd_put_io_u(io_u);
> +	zbd_put_io_u(td, io_u);
>  
>  	if (td->parent)
>  		td = td->parent;
> @@ -1364,7 +1364,7 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
>  		if (!fill_io_u(td, io_u))
>  			break;
>  
> -		zbd_put_io_u(io_u);
> +		zbd_put_io_u(td, io_u);
>  
>  		put_file_log(td, f);
>  		td_io_close_file(td, f);
> diff --git a/io_u.h b/io_u.h
> index 31100928..5a28689c 100644
> --- a/io_u.h
> +++ b/io_u.h
> @@ -101,13 +101,14 @@ struct io_u {
>  	 * @success == true means that the I/O operation has been queued or
>  	 * completed successfully.
>  	 */
> -	void (*zbd_queue_io)(struct io_u *, int q, bool success);
> +	void (*zbd_queue_io)(struct thread_data *td, struct io_u *, int q,
> +			     bool success);
>  
>  	/*
>  	 * ZBD mode zbd_put_io callback: called in after completion of an I/O
>  	 * or commit of an async I/O to unlock the I/O target zone.
>  	 */
> -	void (*zbd_put_io)(const struct io_u *);
> +	void (*zbd_put_io)(struct thread_data *td, const struct io_u *);
>  
>  	/*
>  	 * Callback for io completion
> diff --git a/ioengines.c b/ioengines.c
> index 1c5970a4..476df58d 100644
> --- a/ioengines.c
> +++ b/ioengines.c
> @@ -352,7 +352,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
>  	}
>  
>  	ret = td->io_ops->queue(td, io_u);
> -	zbd_queue_io_u(io_u, ret);
> +	zbd_queue_io_u(td, io_u, ret);
>  
>  	unlock_file(td, io_u->file);
>  
> @@ -394,7 +394,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
>  	if (!td->io_ops->commit) {
>  		io_u_mark_submit(td, 1);
>  		io_u_mark_complete(td, 1);
> -		zbd_put_io_u(io_u);
> +		zbd_put_io_u(td, io_u);
>  	}
>  
>  	if (ret == FIO_Q_COMPLETED) {
> diff --git a/zbd.c b/zbd.c
> index e4a480b7..20f64b58 100644
> --- a/zbd.c
> +++ b/zbd.c
> @@ -721,12 +721,18 @@ static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
>  
>  /* The caller must hold f->zbd_info->mutex */
>  static void zbd_close_zone(struct thread_data *td, const struct fio_file *f,
> -			   unsigned int open_zone_idx)
> +			   unsigned int zone_idx)
>  {
> -	uint32_t zone_idx;
> +	uint32_t open_zone_idx = 0;
> +
> +	for (; open_zone_idx < f->zbd_info->num_open_zones; open_zone_idx++) {
> +		if (f->zbd_info->open_zones[open_zone_idx] == zone_idx)
> +			break;
> +	}
> +	if (open_zone_idx == f->zbd_info->num_open_zones)

I understand that the assert below needs to be removed because this function can be called
as a part of the reset of all zones. Still, maybe add a debug message saying "zone %d is
not open" here?

> +		return;
>  
> -	assert(open_zone_idx < f->zbd_info->num_open_zones);
> -	zone_idx = f->zbd_info->open_zones[open_zone_idx];
> +	dprint(FD_ZBD, "%s: closing zone %d\n", f->file_name, zone_idx);
>  	memmove(f->zbd_info->open_zones + open_zone_idx,
>  		f->zbd_info->open_zones + open_zone_idx + 1,
>  		(ZBD_MAX_OPEN_ZONES - (open_zone_idx + 1)) *
> @@ -765,13 +771,8 @@ static int zbd_reset_zones(struct thread_data *td, struct fio_file *f,
>  			continue;
>  		zone_lock(td, f, z);
>  		if (all_zones) {
> -			unsigned int i;
> -
>  			pthread_mutex_lock(&f->zbd_info->mutex);
> -			for (i = 0; i < f->zbd_info->num_open_zones; i++) {
> -				if (f->zbd_info->open_zones[i] == nz)
> -					zbd_close_zone(td, f, i);
> -			}
> +			zbd_close_zone(td, f, nz);
>  			pthread_mutex_unlock(&f->zbd_info->mutex);
>  
>  			reset_wp = z->wp != z->start;
> @@ -952,8 +953,15 @@ static bool zbd_open_zone(struct thread_data *td, const struct io_u *io_u,
>  		return false;
>  
>  	pthread_mutex_lock(&f->zbd_info->mutex);
> -	if (is_zone_open(td, f, zone_idx))
> +	if (is_zone_open(td, f, zone_idx)) {
> +		/*
> +		 * If the zone is already open and going to be full by writes
> +		 * in-flight, handle it as a full zone instead of an open zone.
> +		 */
> +		if (z->wp >= zbd_zone_capacity_end(z))
> +			res = false;
>  		goto out;
> +	}
>  	res = false;
>  	/* Zero means no limit */
>  	if (td->o.job_max_open_zones > 0 &&
> @@ -995,6 +1003,7 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
>  	unsigned int open_zone_idx = -1;
>  	uint32_t zone_idx, new_zone_idx;
>  	int i;
> +	bool wait_zone_close;
>  
>  	assert(is_valid_offset(f, io_u->offset));
>  
> @@ -1030,11 +1039,9 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
>  		if (td->o.max_open_zones == 0 && td->o.job_max_open_zones == 0)
>  			goto examine_zone;
>  		if (f->zbd_info->num_open_zones == 0) {
> -			pthread_mutex_unlock(&f->zbd_info->mutex);
> -			pthread_mutex_unlock(&z->mutex);
>  			dprint(FD_ZBD, "%s(%s): no zones are open\n",
>  			       __func__, f->file_name);

I was not able to trigger this message by running t/zbd/test-zbd-support script
with -z option. Do you know an easy way to trigger this situation?
I don't see anything wrong with the code here, my concern is mainly about
test coverage...

> -			return NULL;
> +			goto open_other_zone;
>  		}
>  
>  		/*
> @@ -1081,14 +1088,30 @@ examine_zone:
>  		pthread_mutex_unlock(&f->zbd_info->mutex);
>  		goto out;
>  	}
> -	dprint(FD_ZBD, "%s(%s): closing zone %d\n", __func__, f->file_name,
> -	       zone_idx);
> -	if (td->o.max_open_zones || td->o.job_max_open_zones)
> -		zbd_close_zone(td, f, open_zone_idx);
> +
> +open_other_zone:
> +	/* Check if number of open zones reaches one of limits. */
> +	wait_zone_close =
> +		f->zbd_info->num_open_zones == f->max_zone - f->min_zone ||
> +		(td->o.max_open_zones &&
> +		 f->zbd_info->num_open_zones == td->o.max_open_zones) ||
> +		(td->o.job_max_open_zones &&
> +		 td->num_open_zones == td->o.job_max_open_zones);
> +
>  	pthread_mutex_unlock(&f->zbd_info->mutex);
>  
>  	/* Only z->mutex is held. */
>  
> +	/*
> +	 * When number of open zones reaches to one of limits, wait for
> +	 * zone close before opening a new zone.
> +	 */
> +	if (wait_zone_close) {
> +		dprint(FD_ZBD, "%s(%s): quiesce to allow open zones to close\n",
> +		       __func__, f->file_name);
> +		io_u_quiesce(td);
> +	}
> +
>  	/* Zone 'z' is full, so try to open a new zone. */
>  	for (i = f->io_size / f->zbd_info->zone_size; i > 0; i--) {
>  		zone_idx++;
> @@ -1203,6 +1226,29 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u,
>  	return NULL;
>  }
>  
> +/**
> + * zbd_end_zone_io - update zone status at command completion
> + * @io_u: I/O unit
> + * @z: zone info pointer
> + * @success: Whether or not the I/O unit has been queued successfully
> + *
> + * If the write command made the zone full, close it.
> + *
> + * The caller must hold z->mutex.
> + */
> +static void zbd_end_zone_io(struct thread_data *td, const struct io_u *io_u,
> +			    struct fio_zone_info *z, bool success)
> +{
> +	const struct fio_file *f = io_u->file;
> +
> +	if (io_u->ddir == DDIR_WRITE && success &&
> +	    io_u->offset + io_u->buflen >= zbd_zone_capacity_end(z)) {
> +		pthread_mutex_lock(&f->zbd_info->mutex);
> +		zbd_close_zone(td, f, z - f->zbd_info->zone_info);
> +		pthread_mutex_unlock(&f->zbd_info->mutex);
> +	}
> +}
> +
>  /**
>   * zbd_queue_io - update the write pointer of a sequential zone
>   * @io_u: I/O unit
> @@ -1212,7 +1258,8 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u,
>   * For write and trim operations, update the write pointer of the I/O unit
>   * target zone.
>   */
> -static void zbd_queue_io(struct io_u *io_u, int q, bool success)
> +static void zbd_queue_io(struct thread_data *td, struct io_u *io_u, int q,
> +			 bool success)
>  {
>  	const struct fio_file *f = io_u->file;
>  	struct zoned_block_device_info *zbd_info = f->zbd_info;
> @@ -1258,6 +1305,9 @@ static void zbd_queue_io(struct io_u *io_u, int q, bool success)
>  		break;
>  	}
>  
> +	if (q == FIO_Q_COMPLETED)
> +		zbd_end_zone_io(td, io_u, z, !io_u->error);

This could be changed to

+	if (q == FIO_Q_COMPLETED && !io_u->error)
+		zbd_end_zone_io(td, io_u, z);

making the last parameter of zbd_end_zone_io(), "success", unnecessary.

> +
>  unlock:
>  	if (!success || q != FIO_Q_QUEUED) {
>  		/* BUSY or COMPLETED: unlock the zone */
> @@ -1270,7 +1320,7 @@ unlock:
>   * zbd_put_io - Unlock an I/O unit target zone lock
>   * @io_u: I/O unit
>   */
> -static void zbd_put_io(const struct io_u *io_u)
> +static void zbd_put_io(struct thread_data *td, const struct io_u *io_u)
>  {
>  	const struct fio_file *f = io_u->file;
>  	struct zoned_block_device_info *zbd_info = f->zbd_info;
> @@ -1292,6 +1342,8 @@ static void zbd_put_io(const struct io_u *io_u)
>  	       "%s: terminate I/O (%lld, %llu) for zone %u\n",
>  	       f->file_name, io_u->offset, io_u->buflen, zone_idx);
>  
> +	zbd_end_zone_io(td, io_u, z, true);

The last parameter in this call above can be removed with the change above.

> +
>  	ret = pthread_mutex_unlock(&z->mutex);
>  	assert(ret == 0);
>  	zbd_check_swd(f);
> diff --git a/zbd.h b/zbd.h
> index 021174c1..bff55f99 100644
> --- a/zbd.h
> +++ b/zbd.h
> @@ -98,18 +98,19 @@ static inline void zbd_close_file(struct fio_file *f)
>  		zbd_free_zone_info(f);
>  }
>  
> -static inline void zbd_queue_io_u(struct io_u *io_u, enum fio_q_status status)
> +static inline void zbd_queue_io_u(struct thread_data *td, struct io_u *io_u,
> +				  enum fio_q_status status)
>  {
>  	if (io_u->zbd_queue_io) {
> -		io_u->zbd_queue_io(io_u, status, io_u->error == 0);
> +		io_u->zbd_queue_io(td, io_u, status, io_u->error == 0);
>  		io_u->zbd_queue_io = NULL;
>  	}
>  }
>  
> -static inline void zbd_put_io_u(struct io_u *io_u)
> +static inline void zbd_put_io_u(struct thread_data *td, struct io_u *io_u)
>  {
>  	if (io_u->zbd_put_io) {
> -		io_u->zbd_put_io(io_u);
> +		io_u->zbd_put_io(td, io_u);
>  		io_u->zbd_queue_io = NULL;
>  		io_u->zbd_put_io = NULL;
>  	}

Overall approach looks fine to me.

Dmitry




[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux