On Wed, 2021-07-28 at 19:47 +0900, Shin'ichiro Kawasaki wrote: > Enable trim workload for zonemode=zbd by modifying do_io_u_trim() to > call zoned block device unique function zbd_do_io_u_trim() which resets > target zone. This allows fio to emulate workloads which mix data > read/write and zone resets with zonemode=zbd. > > To call reset zones, the trim I/O shall have offset aligned to zone > start and block size same as zone size. Reset zone is called only to > sequential write required zones and sequential write preferred zones. > Conventional zones are handled in same manner as regular block devices > by calling os_trim() function. > > When zones are reset with random trim workload, choose only non-empty > zones as trim target. This avoids meaningless trim to empty zones and > makes the workload more realistic. To find the non-empty zones, utilize > zbd_find_zone() helper function which is already used for read > workload, > specifying 1 byte as the minimum valid data size. > > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@xxxxxxx> > --- > HOWTO | 3 +++ > fio.1 | 2 ++ > io_u.c | 9 ++++++++ > zbd.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- > zbd.h | 2 ++ > 5 files changed, 83 insertions(+), 4 deletions(-) > > diff --git a/HOWTO b/HOWTO > index 59c7f1ff..f1fd2045 100644 > --- a/HOWTO > +++ b/HOWTO > @@ -992,6 +992,9 @@ Target file/device > single zone. The :option:`zoneskip` > parameter > is ignored. :option:`zonerange` and > :option:`zonesize` must be identical. > + Trim is handled using a zone reset > operation. > + Trim only considers non-empty > sequential write > + required and sequential write preferred > zones. The documentation changes could be moved to a separate patch. While looking at HOWTO file, I notice that libzbc is absent from the list of i/o engines there. Perhaps, since you are making changes to this file, you could add a small description of libzbc there and mention that it supports read, write and trim workloads when run against zoned block devices. > > .. option:: zonerange=int > > diff --git a/fio.1 b/fio.1 > index 6cc82542..ef319062 100644 > --- a/fio.1 > +++ b/fio.1 > @@ -766,6 +766,8 @@ starts. The \fBzonecapacity\fR parameter is > ignored. > Zoned block device mode. I/O happens sequentially in each zone, even > if random32ad4373 > I/O has been selected. Random I/O happens across all zones instead of > being > restricted to a single zone. > +Trim is handled using a zone reset operation. Trim only considers non- > empty > +sequential write required and sequential write preferred zones. > .RE > .RE > .TP > diff --git a/io_u.c b/io_u.c > index 9a1cd547..696d25cd 100644 > --- a/io_u.c > +++ b/io_u.c > @@ -2317,10 +2317,19 @@ int do_io_u_trim(const struct thread_data *td, > struct io_u *io_u) > struct fio_file *f = io_u->file; > int ret; > > + if (td->o.zone_mode == ZONE_MODE_ZBD) { > + ret = zbd_do_io_u_trim(td, io_u); > + if (ret == io_u_completed) > + return io_u->xfer_buflen; > + if (ret) > + goto err; > + } > + > ret = os_trim(f, io_u->offset, io_u->xfer_buflen); > if (!ret) > return io_u->xfer_buflen; > > +err: > io_u->error = ret; > return 0; > #endif > diff --git a/zbd.c b/zbd.c > index f10b3267..39060ecd 100644 > --- a/zbd.cpassing in an offset modifier with a value of 8. If the suffix is used with a sequential I/O > +++ b/zbd.c > @@ -375,12 +375,24 @@ static bool zbd_verify_bs(void) > int i, j, k; > > for_each_td(td, i) { > + if (td_trim(td) && > + (td->o.min_bs[DDIR_TRIM] != td- > >o.max_bs[DDIR_TRIM] || > + td->o.bssplit_nr[DDIR_TRIM])) { > + log_info("bsrange and bssplit is not allowed > for trim with zonemode=zbd\n"); > + return false; > + } > for_each_file(td, f, j) { > uint64_t zone_size; > > if (!f->zbd_info) > continue; > zone_size = f->zbd_info->zone_size; > + if (td_trim(td) && td->o.bs[DDIR_TRIM] != > zone_size) { > + log_info("%s: trim block size %llu is > not the zone size %llu\n", > + f->file_name, td- > >o.bs[DDIR_TRIM], > + (unsigned long > long)zone_size); > + return false; > + } > for (k = 0; k < FIO_ARRAY_SIZE(td->o.bs); > k++) { > if (td->o.verify != VERIFY_NONE && > zone_size % td->o.bs[k] != 0) { > @@ -1528,9 +1540,6 @@ static void zbd_queue_io(struct thread_data > *td, struct io_u *io_u, int q, > pthread_mutex_unlock(&zbd_info->mutex); > z->wp = zone_end; > break; > - case DDIR_TRIM: > - assert(z->wp == z->start); > - break; > default: > break; > } > @@ -1910,8 +1919,23 @@ enum io_u_action zbd_adjust_block(struct > thread_data *td, struct io_u *io_u) > (zbd_zone_capacity_end(zb) - io_u->offset), > min_bs); > goto eof; > case DDIR_TRIM: > - /* fall-through */ > + /* Check random trim targets a non-empty zone */ > + if (!td_random(td) || zb->wp > zb->start) > + goto accept; > + > + /* Find out a non-empty zone to trim */ > + zone_unlock(zb); > + zl = get_zone(f, f->max_zone); > + zb = zbd_find_zone(td, io_u, 1, zb, zl); > + if (zb) { > + io_u->offset = zb->start; > + dprint(FD_ZBD, "%s: found new zone(%lld) for > trim\n", > + f->file_name, io_u->offset); > + goto accept; > + } > + goto eof; > case DDIR_SYNC: > + /* fall-through */ > case DDIR_DATASYNC: > case DDIR_SYNC_FILE_RANGE: > case DDIR_WAIT: > @@ -1952,3 +1976,42 @@ char *zbd_write_status(const struct > thread_stat *ts) > return NULL; > return res; > } > + > +/** > + * zbd_do_io_u_trim - If reset zone is applicable, do reset zone > instead of trim > + * > + * @td: FIO thread data. > + * @io_u: FIO I/O unit. > + * > + * It is assumed that z->mutex is already locked. > + * Return io_u_completed when reset zone succeeds. Return 0 when the > target zone > + * does not have write pointer. On error, return negative errno. > + */ > +int zbd_do_io_u_trim(const struct thread_data *td, struct io_u > *io_u) > +{ > + struct fio_file *f = io_u->file; > + struct fio_zone_info *z; > + uint32_t zone_idx; > + int ret; > + > + zone_idx = zbd_zone_idx(f, io_u->offset); > + z = get_zone(f, zone_idx); > + > + if (!z->has_wp) > + return 0; > + > + if (io_u->offset != z->start) { > + log_err("Trim offset not at zone start (%lld)\n", > io_u->offset); > + return -EINVAL; > + } > + > + /* > + * Cast td to drop const modifier so that zbd_reset_zone() > can change td > + * members. > + */ This comment is not really necessary. > + ret = zbd_reset_zone((struct thread_data *)td, f, z); > + if (ret < 0) > + return ret; > + > + return io_u_completed; > +} > diff --git a/zbd.h b/zbd.h > index 39dc45e3..0a73b41d 100644 > --- a/zbd.h > +++ b/zbd.h > @@ -17,6 +17,7 @@ struct fio_file; > enum io_u_action { > io_u_accept = 0, > io_u_eof = 1, > + io_u_completed = 2, > }; > > /** > @@ -99,6 +100,7 @@ enum fio_ddir zbd_adjust_ddir(struct thread_data > *td, struct io_u *io_u, > enum fio_ddir ddir); > enum io_u_action zbd_adjust_block(struct thread_data *td, struct > io_u *io_u); > char *zbd_write_status(const struct thread_stat *ts); > +int zbd_do_io_u_trim(const struct thread_data *td, struct io_u > *io_u); > > static inline void zbd_close_file(struct fio_file *f) > {