Re: [PATCH 1/6] zbd: Decrement open zones count at write command completion

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Dmitry, thank you for your review comments and sorry about my slow response.

On Aug 15, 2020 / 00:05, Dmitry Fomichev wrote:
> On Thu, 2020-08-13 at 13:57 +0900, Shin'ichiro Kawasaki wrote:
> > When max_open_zones or job_max_open_zones option is provided, fio counts
> > number of open zones. This open zone accounting is done during submission
> > of write commands. When write command is submitted to a zone for the
> > first time, the zone is counted as open. When a write command which will
> > fill the zone gets submitted, the zone is counted as closed. However,
> > this count at write command submission may open zones more than the
> > limit. When writes to zones more than the limit are submitted with high
> > queue depth, those writes in-flight open zones more than the limit.
> > 
> > To avoid such writes beyond max open zones limit, decrement number of
> > open zones count not at write command submission but at write command
> > completion. By doing this, the number of zones with in-flight write
> > commands are kept within the limit with accuracy. Introduce the helper
> > function zbd_end_zone_io() for this decrement and zbd_close_zone() call.
> > 
> > The zbd_close_zone() function requires thread_data argument. To refer
> > thread_data at write command completion, add the argument to zbd_put_io()
> > and zbd_queue_io() callbacks. Also add a loop to the zbd_close_zone()
> > function which converts zone_info array index to open_zones array index
> > to avoid loop code duplication.
> > 
> > Even when io_u points to an open zone, the zone may not be valid for
> > write since in-flight write commands may make the zone full. Check this
> > in zbd_open_zone() to handle such zones as in full status.
> > 
> > Because of the zone close timing change, there might be no open zone when
> > zbd_convert_to_open_zone() is called. Do not handle such case as an
> > error and open other zones.
> > 
> > When zbd_convert_to_open_zone() finds all open zones can not be used for
> > next write, it opens other zones. This zone open fails when the number of
> > open zones hits one of the limits: 1) number of zones in the fio write
> > target region, 2) max_open_zones option or 3) job_max_open_zones option.
> > To avoid the zone open failure, wait for writes in-flight completes and
> > open zones get closed before opening other zones.
> > 
> > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@xxxxxxx>
> > ---
> >  io_u.c      |  4 +--
> >  io_u.h      |  5 +--
> >  ioengines.c |  4 +--
> >  zbd.c       | 92 +++++++++++++++++++++++++++++++++++++++++------------
> >  zbd.h       |  9 +++---
> >  5 files changed, 84 insertions(+), 30 deletions(-)
> > 
> > diff --git a/io_u.c b/io_u.c
> > index 2ef5acec..62bfec47 100644
> > --- a/io_u.c
> > +++ b/io_u.c
> > @@ -795,7 +795,7 @@ void put_io_u(struct thread_data *td, struct io_u *io_u)
> >  {
> >  	const bool needs_lock = td_async_processing(td);
> >  
> > -	zbd_put_io_u(io_u);
> > +	zbd_put_io_u(td, io_u);
> >  
> >  	if (td->parent)
> >  		td = td->parent;
> > @@ -1364,7 +1364,7 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u)
> >  		if (!fill_io_u(td, io_u))
> >  			break;
> >  
> > -		zbd_put_io_u(io_u);
> > +		zbd_put_io_u(td, io_u);
> >  
> >  		put_file_log(td, f);
> >  		td_io_close_file(td, f);
> > diff --git a/io_u.h b/io_u.h
> > index 31100928..5a28689c 100644
> > --- a/io_u.h
> > +++ b/io_u.h
> > @@ -101,13 +101,14 @@ struct io_u {
> >  	 * @success == true means that the I/O operation has been queued or
> >  	 * completed successfully.
> >  	 */
> > -	void (*zbd_queue_io)(struct io_u *, int q, bool success);
> > +	void (*zbd_queue_io)(struct thread_data *td, struct io_u *, int q,
> > +			     bool success);
> >  
> >  	/*
> >  	 * ZBD mode zbd_put_io callback: called in after completion of an I/O
> >  	 * or commit of an async I/O to unlock the I/O target zone.
> >  	 */
> > -	void (*zbd_put_io)(const struct io_u *);
> > +	void (*zbd_put_io)(struct thread_data *td, const struct io_u *);
> >  
> >  	/*
> >  	 * Callback for io completion
> > diff --git a/ioengines.c b/ioengines.c
> > index 1c5970a4..476df58d 100644
> > --- a/ioengines.c
> > +++ b/ioengines.c
> > @@ -352,7 +352,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
> >  	}
> >  
> >  	ret = td->io_ops->queue(td, io_u);
> > -	zbd_queue_io_u(io_u, ret);
> > +	zbd_queue_io_u(td, io_u, ret);
> >  
> >  	unlock_file(td, io_u->file);
> >  
> > @@ -394,7 +394,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u)
> >  	if (!td->io_ops->commit) {
> >  		io_u_mark_submit(td, 1);
> >  		io_u_mark_complete(td, 1);
> > -		zbd_put_io_u(io_u);
> > +		zbd_put_io_u(td, io_u);
> >  	}
> >  
> >  	if (ret == FIO_Q_COMPLETED) {
> > diff --git a/zbd.c b/zbd.c
> > index e4a480b7..20f64b58 100644
> > --- a/zbd.c
> > +++ b/zbd.c
> > @@ -721,12 +721,18 @@ static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
> >  
> >  /* The caller must hold f->zbd_info->mutex */
> >  static void zbd_close_zone(struct thread_data *td, const struct fio_file *f,
> > -			   unsigned int open_zone_idx)
> > +			   unsigned int zone_idx)
> >  {
> > -	uint32_t zone_idx;
> > +	uint32_t open_zone_idx = 0;
> > +
> > +	for (; open_zone_idx < f->zbd_info->num_open_zones; open_zone_idx++) {
> > +		if (f->zbd_info->open_zones[open_zone_idx] == zone_idx)
> > +			break;
> > +	}
> > +	if (open_zone_idx == f->zbd_info->num_open_zones)
> 
> I understand that the assert below needs to be removed because this function can be called
> as a part of the reset of all zones. Still, maybe add a debug message saying "zone %d is
> not open" here?

Okay, will add the debug message in the v2.

> 
> > +		return;
> >  
> > -	assert(open_zone_idx < f->zbd_info->num_open_zones);
> > -	zone_idx = f->zbd_info->open_zones[open_zone_idx];
> > +	dprint(FD_ZBD, "%s: closing zone %d\n", f->file_name, zone_idx);
> >  	memmove(f->zbd_info->open_zones + open_zone_idx,
> >  		f->zbd_info->open_zones + open_zone_idx + 1,
> >  		(ZBD_MAX_OPEN_ZONES - (open_zone_idx + 1)) *
> > @@ -765,13 +771,8 @@ static int zbd_reset_zones(struct thread_data *td, struct fio_file *f,
> >  			continue;
> >  		zone_lock(td, f, z);
> >  		if (all_zones) {
> > -			unsigned int i;
> > -
> >  			pthread_mutex_lock(&f->zbd_info->mutex);
> > -			for (i = 0; i < f->zbd_info->num_open_zones; i++) {
> > -				if (f->zbd_info->open_zones[i] == nz)
> > -					zbd_close_zone(td, f, i);
> > -			}
> > +			zbd_close_zone(td, f, nz);
> >  			pthread_mutex_unlock(&f->zbd_info->mutex);
> >  
> >  			reset_wp = z->wp != z->start;
> > @@ -952,8 +953,15 @@ static bool zbd_open_zone(struct thread_data *td, const struct io_u *io_u,
> >  		return false;
> >  
> >  	pthread_mutex_lock(&f->zbd_info->mutex);
> > -	if (is_zone_open(td, f, zone_idx))
> > +	if (is_zone_open(td, f, zone_idx)) {
> > +		/*
> > +		 * If the zone is already open and going to be full by writes
> > +		 * in-flight, handle it as a full zone instead of an open zone.
> > +		 */
> > +		if (z->wp >= zbd_zone_capacity_end(z))
> > +			res = false;
> >  		goto out;
> > +	}
> >  	res = false;
> >  	/* Zero means no limit */
> >  	if (td->o.job_max_open_zones > 0 &&
> > @@ -995,6 +1003,7 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
> >  	unsigned int open_zone_idx = -1;
> >  	uint32_t zone_idx, new_zone_idx;
> >  	int i;
> > +	bool wait_zone_close;
> >  
> >  	assert(is_valid_offset(f, io_u->offset));
> >  
> > @@ -1030,11 +1039,9 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td,
> >  		if (td->o.max_open_zones == 0 && td->o.job_max_open_zones == 0)
> >  			goto examine_zone;
> >  		if (f->zbd_info->num_open_zones == 0) {
> > -			pthread_mutex_unlock(&f->zbd_info->mutex);
> > -			pthread_mutex_unlock(&z->mutex);
> >  			dprint(FD_ZBD, "%s(%s): no zones are open\n",
> >  			       __func__, f->file_name);
> 
> I was not able to trigger this message by running t/zbd/test-zbd-support script
> with -z option. Do you know an easy way to trigger this situation?
> I don't see anything wrong with the code here, my concern is mainly about
> test coverage...

The situation is recreated with the test case #27 and a zoned block device
which has zone capacity smaller than zone size. The command is as follows.

# ./run-tests-against-zoned-nullb -zone-cap -t 27 -z -o 12

> 
> > -			return NULL;
> > +			goto open_other_zone;
> >  		}
> >  
> >  		/*
> > @@ -1081,14 +1088,30 @@ examine_zone:
> >  		pthread_mutex_unlock(&f->zbd_info->mutex);
> >  		goto out;
> >  	}
> > -	dprint(FD_ZBD, "%s(%s): closing zone %d\n", __func__, f->file_name,
> > -	       zone_idx);
> > -	if (td->o.max_open_zones || td->o.job_max_open_zones)
> > -		zbd_close_zone(td, f, open_zone_idx);
> > +
> > +open_other_zone:
> > +	/* Check if number of open zones reaches one of limits. */
> > +	wait_zone_close =
> > +		f->zbd_info->num_open_zones == f->max_zone - f->min_zone ||
> > +		(td->o.max_open_zones &&
> > +		 f->zbd_info->num_open_zones == td->o.max_open_zones) ||
> > +		(td->o.job_max_open_zones &&
> > +		 td->num_open_zones == td->o.job_max_open_zones);
> > +
> >  	pthread_mutex_unlock(&f->zbd_info->mutex);
> >  
> >  	/* Only z->mutex is held. */
> >  
> > +	/*
> > +	 * When number of open zones reaches to one of limits, wait for
> > +	 * zone close before opening a new zone.
> > +	 */
> > +	if (wait_zone_close) {
> > +		dprint(FD_ZBD, "%s(%s): quiesce to allow open zones to close\n",
> > +		       __func__, f->file_name);
> > +		io_u_quiesce(td);
> > +	}
> > +
> >  	/* Zone 'z' is full, so try to open a new zone. */
> >  	for (i = f->io_size / f->zbd_info->zone_size; i > 0; i--) {
> >  		zone_idx++;
> > @@ -1203,6 +1226,29 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u,
> >  	return NULL;
> >  }
> >  
> > +/**
> > + * zbd_end_zone_io - update zone status at command completion
> > + * @io_u: I/O unit
> > + * @z: zone info pointer
> > + * @success: Whether or not the I/O unit has been queued successfully
> > + *
> > + * If the write command made the zone full, close it.
> > + *
> > + * The caller must hold z->mutex.
> > + */
> > +static void zbd_end_zone_io(struct thread_data *td, const struct io_u *io_u,
> > +			    struct fio_zone_info *z, bool success)
> > +{
> > +	const struct fio_file *f = io_u->file;
> > +
> > +	if (io_u->ddir == DDIR_WRITE && success &&
> > +	    io_u->offset + io_u->buflen >= zbd_zone_capacity_end(z)) {
> > +		pthread_mutex_lock(&f->zbd_info->mutex);
> > +		zbd_close_zone(td, f, z - f->zbd_info->zone_info);
> > +		pthread_mutex_unlock(&f->zbd_info->mutex);
> > +	}
> > +}
> > +
> >  /**
> >   * zbd_queue_io - update the write pointer of a sequential zone
> >   * @io_u: I/O unit
> > @@ -1212,7 +1258,8 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u,
> >   * For write and trim operations, update the write pointer of the I/O unit
> >   * target zone.
> >   */
> > -static void zbd_queue_io(struct io_u *io_u, int q, bool success)
> > +static void zbd_queue_io(struct thread_data *td, struct io_u *io_u, int q,
> > +			 bool success)
> >  {
> >  	const struct fio_file *f = io_u->file;
> >  	struct zoned_block_device_info *zbd_info = f->zbd_info;
> > @@ -1258,6 +1305,9 @@ static void zbd_queue_io(struct io_u *io_u, int q, bool success)
> >  		break;
> >  	}
> >  
> > +	if (q == FIO_Q_COMPLETED)
> > +		zbd_end_zone_io(td, io_u, z, !io_u->error);
> 
> This could be changed to
> 
> +	if (q == FIO_Q_COMPLETED && !io_u->error)
> +		zbd_end_zone_io(td, io_u, z);
> 
> making the last parameter of zbd_end_zone_io(), "success", unnecessary.
> 
> > +
> >  unlock:
> >  	if (!success || q != FIO_Q_QUEUED) {
> >  		/* BUSY or COMPLETED: unlock the zone */
> > @@ -1270,7 +1320,7 @@ unlock:
> >   * zbd_put_io - Unlock an I/O unit target zone lock
> >   * @io_u: I/O unit
> >   */
> > -static void zbd_put_io(const struct io_u *io_u)
> > +static void zbd_put_io(struct thread_data *td, const struct io_u *io_u)
> >  {
> >  	const struct fio_file *f = io_u->file;
> >  	struct zoned_block_device_info *zbd_info = f->zbd_info;
> > @@ -1292,6 +1342,8 @@ static void zbd_put_io(const struct io_u *io_u)
> >  	       "%s: terminate I/O (%lld, %llu) for zone %u\n",
> >  	       f->file_name, io_u->offset, io_u->buflen, zone_idx);
> >  
> > +	zbd_end_zone_io(td, io_u, z, true);
> 
> The last parameter in this call above can be removed with the change above.

Thanks! This idea will make the code simpler. Will reflect to v2.

> 
> > +
> >  	ret = pthread_mutex_unlock(&z->mutex);
> >  	assert(ret == 0);
> >  	zbd_check_swd(f);
> > diff --git a/zbd.h b/zbd.h
> > index 021174c1..bff55f99 100644
> > --- a/zbd.h
> > +++ b/zbd.h
> > @@ -98,18 +98,19 @@ static inline void zbd_close_file(struct fio_file *f)
> >  		zbd_free_zone_info(f);
> >  }
> >  
> > -static inline void zbd_queue_io_u(struct io_u *io_u, enum fio_q_status status)
> > +static inline void zbd_queue_io_u(struct thread_data *td, struct io_u *io_u,
> > +				  enum fio_q_status status)
> >  {
> >  	if (io_u->zbd_queue_io) {
> > -		io_u->zbd_queue_io(io_u, status, io_u->error == 0);
> > +		io_u->zbd_queue_io(td, io_u, status, io_u->error == 0);
> >  		io_u->zbd_queue_io = NULL;
> >  	}
> >  }
> >  
> > -static inline void zbd_put_io_u(struct io_u *io_u)
> > +static inline void zbd_put_io_u(struct thread_data *td, struct io_u *io_u)
> >  {
> >  	if (io_u->zbd_put_io) {
> > -		io_u->zbd_put_io(io_u);
> > +		io_u->zbd_put_io(td, io_u);
> >  		io_u->zbd_queue_io = NULL;
> >  		io_u->zbd_put_io = NULL;
> >  	}
> 
> Overall approach looks fine to me.
> 
> Dmitry

-- 
Best Regards,
Shin'ichiro Kawasaki



[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux