Hi Dmitry, thank you for your review comments and sorry about my slow response. On Aug 15, 2020 / 00:05, Dmitry Fomichev wrote: > On Thu, 2020-08-13 at 13:57 +0900, Shin'ichiro Kawasaki wrote: > > When max_open_zones or job_max_open_zones option is provided, fio counts > > number of open zones. This open zone accounting is done during submission > > of write commands. When write command is submitted to a zone for the > > first time, the zone is counted as open. When a write command which will > > fill the zone gets submitted, the zone is counted as closed. However, > > this count at write command submission may open zones more than the > > limit. When writes to zones more than the limit are submitted with high > > queue depth, those writes in-flight open zones more than the limit. > > > > To avoid such writes beyond max open zones limit, decrement number of > > open zones count not at write command submission but at write command > > completion. By doing this, the number of zones with in-flight write > > commands are kept within the limit with accuracy. Introduce the helper > > function zbd_end_zone_io() for this decrement and zbd_close_zone() call. > > > > The zbd_close_zone() function requires thread_data argument. To refer > > thread_data at write command completion, add the argument to zbd_put_io() > > and zbd_queue_io() callbacks. Also add a loop to the zbd_close_zone() > > function which converts zone_info array index to open_zones array index > > to avoid loop code duplication. > > > > Even when io_u points to an open zone, the zone may not be valid for > > write since in-flight write commands may make the zone full. Check this > > in zbd_open_zone() to handle such zones as in full status. > > > > Because of the zone close timing change, there might be no open zone when > > zbd_convert_to_open_zone() is called. Do not handle such case as an > > error and open other zones. > > > > When zbd_convert_to_open_zone() finds all open zones can not be used for > > next write, it opens other zones. This zone open fails when the number of > > open zones hits one of the limits: 1) number of zones in the fio write > > target region, 2) max_open_zones option or 3) job_max_open_zones option. > > To avoid the zone open failure, wait for writes in-flight completes and > > open zones get closed before opening other zones. > > > > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@xxxxxxx> > > --- > > io_u.c | 4 +-- > > io_u.h | 5 +-- > > ioengines.c | 4 +-- > > zbd.c | 92 +++++++++++++++++++++++++++++++++++++++++------------ > > zbd.h | 9 +++--- > > 5 files changed, 84 insertions(+), 30 deletions(-) > > > > diff --git a/io_u.c b/io_u.c > > index 2ef5acec..62bfec47 100644 > > --- a/io_u.c > > +++ b/io_u.c > > @@ -795,7 +795,7 @@ void put_io_u(struct thread_data *td, struct io_u *io_u) > > { > > const bool needs_lock = td_async_processing(td); > > > > - zbd_put_io_u(io_u); > > + zbd_put_io_u(td, io_u); > > > > if (td->parent) > > td = td->parent; > > @@ -1364,7 +1364,7 @@ static long set_io_u_file(struct thread_data *td, struct io_u *io_u) > > if (!fill_io_u(td, io_u)) > > break; > > > > - zbd_put_io_u(io_u); > > + zbd_put_io_u(td, io_u); > > > > put_file_log(td, f); > > td_io_close_file(td, f); > > diff --git a/io_u.h b/io_u.h > > index 31100928..5a28689c 100644 > > --- a/io_u.h > > +++ b/io_u.h > > @@ -101,13 +101,14 @@ struct io_u { > > * @success == true means that the I/O operation has been queued or > > * completed successfully. > > */ > > - void (*zbd_queue_io)(struct io_u *, int q, bool success); > > + void (*zbd_queue_io)(struct thread_data *td, struct io_u *, int q, > > + bool success); > > > > /* > > * ZBD mode zbd_put_io callback: called in after completion of an I/O > > * or commit of an async I/O to unlock the I/O target zone. > > */ > > - void (*zbd_put_io)(const struct io_u *); > > + void (*zbd_put_io)(struct thread_data *td, const struct io_u *); > > > > /* > > * Callback for io completion > > diff --git a/ioengines.c b/ioengines.c > > index 1c5970a4..476df58d 100644 > > --- a/ioengines.c > > +++ b/ioengines.c > > @@ -352,7 +352,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u) > > } > > > > ret = td->io_ops->queue(td, io_u); > > - zbd_queue_io_u(io_u, ret); > > + zbd_queue_io_u(td, io_u, ret); > > > > unlock_file(td, io_u->file); > > > > @@ -394,7 +394,7 @@ enum fio_q_status td_io_queue(struct thread_data *td, struct io_u *io_u) > > if (!td->io_ops->commit) { > > io_u_mark_submit(td, 1); > > io_u_mark_complete(td, 1); > > - zbd_put_io_u(io_u); > > + zbd_put_io_u(td, io_u); > > } > > > > if (ret == FIO_Q_COMPLETED) { > > diff --git a/zbd.c b/zbd.c > > index e4a480b7..20f64b58 100644 > > --- a/zbd.c > > +++ b/zbd.c > > @@ -721,12 +721,18 @@ static int zbd_reset_zone(struct thread_data *td, struct fio_file *f, > > > > /* The caller must hold f->zbd_info->mutex */ > > static void zbd_close_zone(struct thread_data *td, const struct fio_file *f, > > - unsigned int open_zone_idx) > > + unsigned int zone_idx) > > { > > - uint32_t zone_idx; > > + uint32_t open_zone_idx = 0; > > + > > + for (; open_zone_idx < f->zbd_info->num_open_zones; open_zone_idx++) { > > + if (f->zbd_info->open_zones[open_zone_idx] == zone_idx) > > + break; > > + } > > + if (open_zone_idx == f->zbd_info->num_open_zones) > > I understand that the assert below needs to be removed because this function can be called > as a part of the reset of all zones. Still, maybe add a debug message saying "zone %d is > not open" here? Okay, will add the debug message in the v2. > > > + return; > > > > - assert(open_zone_idx < f->zbd_info->num_open_zones); > > - zone_idx = f->zbd_info->open_zones[open_zone_idx]; > > + dprint(FD_ZBD, "%s: closing zone %d\n", f->file_name, zone_idx); > > memmove(f->zbd_info->open_zones + open_zone_idx, > > f->zbd_info->open_zones + open_zone_idx + 1, > > (ZBD_MAX_OPEN_ZONES - (open_zone_idx + 1)) * > > @@ -765,13 +771,8 @@ static int zbd_reset_zones(struct thread_data *td, struct fio_file *f, > > continue; > > zone_lock(td, f, z); > > if (all_zones) { > > - unsigned int i; > > - > > pthread_mutex_lock(&f->zbd_info->mutex); > > - for (i = 0; i < f->zbd_info->num_open_zones; i++) { > > - if (f->zbd_info->open_zones[i] == nz) > > - zbd_close_zone(td, f, i); > > - } > > + zbd_close_zone(td, f, nz); > > pthread_mutex_unlock(&f->zbd_info->mutex); > > > > reset_wp = z->wp != z->start; > > @@ -952,8 +953,15 @@ static bool zbd_open_zone(struct thread_data *td, const struct io_u *io_u, > > return false; > > > > pthread_mutex_lock(&f->zbd_info->mutex); > > - if (is_zone_open(td, f, zone_idx)) > > + if (is_zone_open(td, f, zone_idx)) { > > + /* > > + * If the zone is already open and going to be full by writes > > + * in-flight, handle it as a full zone instead of an open zone. > > + */ > > + if (z->wp >= zbd_zone_capacity_end(z)) > > + res = false; > > goto out; > > + } > > res = false; > > /* Zero means no limit */ > > if (td->o.job_max_open_zones > 0 && > > @@ -995,6 +1003,7 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, > > unsigned int open_zone_idx = -1; > > uint32_t zone_idx, new_zone_idx; > > int i; > > + bool wait_zone_close; > > > > assert(is_valid_offset(f, io_u->offset)); > > > > @@ -1030,11 +1039,9 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, > > if (td->o.max_open_zones == 0 && td->o.job_max_open_zones == 0) > > goto examine_zone; > > if (f->zbd_info->num_open_zones == 0) { > > - pthread_mutex_unlock(&f->zbd_info->mutex); > > - pthread_mutex_unlock(&z->mutex); > > dprint(FD_ZBD, "%s(%s): no zones are open\n", > > __func__, f->file_name); > > I was not able to trigger this message by running t/zbd/test-zbd-support script > with -z option. Do you know an easy way to trigger this situation? > I don't see anything wrong with the code here, my concern is mainly about > test coverage... The situation is recreated with the test case #27 and a zoned block device which has zone capacity smaller than zone size. The command is as follows. # ./run-tests-against-zoned-nullb -zone-cap -t 27 -z -o 12 > > > - return NULL; > > + goto open_other_zone; > > } > > > > /* > > @@ -1081,14 +1088,30 @@ examine_zone: > > pthread_mutex_unlock(&f->zbd_info->mutex); > > goto out; > > } > > - dprint(FD_ZBD, "%s(%s): closing zone %d\n", __func__, f->file_name, > > - zone_idx); > > - if (td->o.max_open_zones || td->o.job_max_open_zones) > > - zbd_close_zone(td, f, open_zone_idx); > > + > > +open_other_zone: > > + /* Check if number of open zones reaches one of limits. */ > > + wait_zone_close = > > + f->zbd_info->num_open_zones == f->max_zone - f->min_zone || > > + (td->o.max_open_zones && > > + f->zbd_info->num_open_zones == td->o.max_open_zones) || > > + (td->o.job_max_open_zones && > > + td->num_open_zones == td->o.job_max_open_zones); > > + > > pthread_mutex_unlock(&f->zbd_info->mutex); > > > > /* Only z->mutex is held. */ > > > > + /* > > + * When number of open zones reaches to one of limits, wait for > > + * zone close before opening a new zone. > > + */ > > + if (wait_zone_close) { > > + dprint(FD_ZBD, "%s(%s): quiesce to allow open zones to close\n", > > + __func__, f->file_name); > > + io_u_quiesce(td); > > + } > > + > > /* Zone 'z' is full, so try to open a new zone. */ > > for (i = f->io_size / f->zbd_info->zone_size; i > 0; i--) { > > zone_idx++; > > @@ -1203,6 +1226,29 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u, > > return NULL; > > } > > > > +/** > > + * zbd_end_zone_io - update zone status at command completion > > + * @io_u: I/O unit > > + * @z: zone info pointer > > + * @success: Whether or not the I/O unit has been queued successfully > > + * > > + * If the write command made the zone full, close it. > > + * > > + * The caller must hold z->mutex. > > + */ > > +static void zbd_end_zone_io(struct thread_data *td, const struct io_u *io_u, > > + struct fio_zone_info *z, bool success) > > +{ > > + const struct fio_file *f = io_u->file; > > + > > + if (io_u->ddir == DDIR_WRITE && success && > > + io_u->offset + io_u->buflen >= zbd_zone_capacity_end(z)) { > > + pthread_mutex_lock(&f->zbd_info->mutex); > > + zbd_close_zone(td, f, z - f->zbd_info->zone_info); > > + pthread_mutex_unlock(&f->zbd_info->mutex); > > + } > > +} > > + > > /** > > * zbd_queue_io - update the write pointer of a sequential zone > > * @io_u: I/O unit > > @@ -1212,7 +1258,8 @@ zbd_find_zone(struct thread_data *td, struct io_u *io_u, > > * For write and trim operations, update the write pointer of the I/O unit > > * target zone. > > */ > > -static void zbd_queue_io(struct io_u *io_u, int q, bool success) > > +static void zbd_queue_io(struct thread_data *td, struct io_u *io_u, int q, > > + bool success) > > { > > const struct fio_file *f = io_u->file; > > struct zoned_block_device_info *zbd_info = f->zbd_info; > > @@ -1258,6 +1305,9 @@ static void zbd_queue_io(struct io_u *io_u, int q, bool success) > > break; > > } > > > > + if (q == FIO_Q_COMPLETED) > > + zbd_end_zone_io(td, io_u, z, !io_u->error); > > This could be changed to > > + if (q == FIO_Q_COMPLETED && !io_u->error) > + zbd_end_zone_io(td, io_u, z); > > making the last parameter of zbd_end_zone_io(), "success", unnecessary. > > > + > > unlock: > > if (!success || q != FIO_Q_QUEUED) { > > /* BUSY or COMPLETED: unlock the zone */ > > @@ -1270,7 +1320,7 @@ unlock: > > * zbd_put_io - Unlock an I/O unit target zone lock > > * @io_u: I/O unit > > */ > > -static void zbd_put_io(const struct io_u *io_u) > > +static void zbd_put_io(struct thread_data *td, const struct io_u *io_u) > > { > > const struct fio_file *f = io_u->file; > > struct zoned_block_device_info *zbd_info = f->zbd_info; > > @@ -1292,6 +1342,8 @@ static void zbd_put_io(const struct io_u *io_u) > > "%s: terminate I/O (%lld, %llu) for zone %u\n", > > f->file_name, io_u->offset, io_u->buflen, zone_idx); > > > > + zbd_end_zone_io(td, io_u, z, true); > > The last parameter in this call above can be removed with the change above. Thanks! This idea will make the code simpler. Will reflect to v2. > > > + > > ret = pthread_mutex_unlock(&z->mutex); > > assert(ret == 0); > > zbd_check_swd(f); > > diff --git a/zbd.h b/zbd.h > > index 021174c1..bff55f99 100644 > > --- a/zbd.h > > +++ b/zbd.h > > @@ -98,18 +98,19 @@ static inline void zbd_close_file(struct fio_file *f) > > zbd_free_zone_info(f); > > } > > > > -static inline void zbd_queue_io_u(struct io_u *io_u, enum fio_q_status status) > > +static inline void zbd_queue_io_u(struct thread_data *td, struct io_u *io_u, > > + enum fio_q_status status) > > { > > if (io_u->zbd_queue_io) { > > - io_u->zbd_queue_io(io_u, status, io_u->error == 0); > > + io_u->zbd_queue_io(td, io_u, status, io_u->error == 0); > > io_u->zbd_queue_io = NULL; > > } > > } > > > > -static inline void zbd_put_io_u(struct io_u *io_u) > > +static inline void zbd_put_io_u(struct thread_data *td, struct io_u *io_u) > > { > > if (io_u->zbd_put_io) { > > - io_u->zbd_put_io(io_u); > > + io_u->zbd_put_io(td, io_u); > > io_u->zbd_queue_io = NULL; > > io_u->zbd_put_io = NULL; > > } > > Overall approach looks fine to me. > > Dmitry -- Best Regards, Shin'ichiro Kawasaki