On 2020/04/30 21:41, Alexey Dobriyan wrote: > It is not possible to maintain equal per-thread iodepth. The way code > is written, "max_open_zones" acts as a global limit, and one thread > opens all "max_open_zones" for itself and others starve for available > zones and _exit_ prematurely. > > This config is guaranteed to make equal number of zone resets/IO now: > each thread generates identical pattern and doesn't intersect with other > threads: > > zonemode=zbd > zonesize=... > rw=write > > numjobs=N > offset_increment=M*zonesize > > [j] > size=M*zonesize > > Patch introduces "global_max_open_zones" which is per-device config > option. "max_open_zones" becomes per-thread limit. Both limits are > checked for each open zone so one thread can't starve others. It makes sense. Nice one. But the change as is will break existing test scripts (e.g. lots of SMR drives are being tested with this). I think we can avoid this breakage simply: leave max_open_zones option definition as is and add "job_max_open_zones" or "thread_max_open_zones" option (no strong feelings about the name here, as long as it is explicit) to define the per thread maximum number of open zones. This new option could actually default to max_open_zones / numjobs if that is not 0. > > Signed-off-by: Alexey Dobriyan (SK hynix) <adobriyan@xxxxxxxxx> > --- > fio.1 | 6 +++++- > fio.h | 1 + > options.c | 14 ++++++++++++-- > thread_options.h | 1 + > zbd.c | 37 +++++++++++++++++++++++++++++++++---- > zbd.h | 3 +++ > 6 files changed, 55 insertions(+), 7 deletions(-) > > diff --git a/fio.1 b/fio.1 > index a2379f98..1c04a3af 100644 > --- a/fio.1 > +++ b/fio.1 > @@ -804,7 +804,11 @@ so. Default: false. > When running a random write test across an entire drive many more zones will be > open than in a typical application workload. Hence this command line option > that allows to limit the number of open zones. The number of open zones is > -defined as the number of zones to which write commands are issued. > +defined as the number of zones to which write commands are issued by one > +thread/process. > +.TP > +.BI global_max_open_zones \fR=\fPint > +Global limit on the number of simultaneously opened zones per block device. > .TP > .BI zone_reset_threshold \fR=\fPfloat > A number between zero and one that indicates the ratio of logical blocks with > diff --git a/fio.h b/fio.h > index bbf057c1..20ca80e2 100644 > --- a/fio.h > +++ b/fio.h > @@ -260,6 +260,7 @@ struct thread_data { > struct frand_state prio_state; > > struct zone_split_index **zone_state_index; > + unsigned int num_open_zones; > > unsigned int verify_batch; > unsigned int trim_batch; > diff --git a/options.c b/options.c > index 2372c042..306874ea 100644 > --- a/options.c > +++ b/options.c > @@ -3364,8 +3364,18 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { > .type = FIO_OPT_INT, > .off1 = offsetof(struct thread_options, max_open_zones), > .maxval = ZBD_MAX_OPEN_ZONES, > - .help = "Limit random writes to SMR drives to the specified" > - " number of sequential zones", > + .help = "Thread/process limit on the number of simultaneously opened sequential write zones in SMR/ZNS drives", > + .def = "0", > + .category = FIO_OPT_C_IO, > + .group = FIO_OPT_G_INVALID, > + }, > + { > + .name = "global_max_open_zones", > + .lname = "Global maximum number of open zones", > + .type = FIO_OPT_INT, > + .off1 = offsetof(struct thread_options, global_max_open_zones), > + .maxval = ZBD_MAX_OPEN_ZONES, > + .help = "Block device limit on the number of simultaneously opened sequential write zones in SMR/ZNS drives", > .def = "0", > .category = FIO_OPT_C_IO, > .group = FIO_OPT_G_INVALID, > diff --git a/thread_options.h b/thread_options.h > index c78ed43d..4078d46f 100644 > --- a/thread_options.h > +++ b/thread_options.h > @@ -342,6 +342,7 @@ struct thread_options { > /* Parameters that affect zonemode=zbd */ > unsigned int read_beyond_wp; > int max_open_zones; > + unsigned int global_max_open_zones; > fio_fp64_t zrt; > fio_fp64_t zrf; > }; > diff --git a/zbd.c b/zbd.c > index e8f0a4d8..a517349a 100644 > --- a/zbd.c > +++ b/zbd.c > @@ -546,8 +546,10 @@ static int zbd_create_zone_info(struct thread_data *td, struct fio_file *f) > return -EINVAL; > } > > - if (ret == 0) > + if (ret == 0) { > f->zbd_info->model = zbd_model; > + f->zbd_info->max_open_zones = td->o.global_max_open_zones; > + } > return ret; > } > > @@ -622,6 +624,27 @@ int zbd_init(struct thread_data *td) > if (!zbd_verify_bs()) > return 1; > > + for_each_file(td, f, i) { > + struct zoned_block_device_info *zbd = f->zbd_info; > + > + if (!zbd) > + continue; > + > + if (zbd->max_open_zones == 0) { > + zbd->max_open_zones = ZBD_MAX_OPEN_ZONES; > + } > + > + if (td->o.global_max_open_zones && > + zbd->max_open_zones != td->o.global_max_open_zones) { > + log_err("Different 'global_max_open_zones' values\n"); > + return 1; > + } > + if (zbd->max_open_zones > ZBD_MAX_OPEN_ZONES) { > + log_err("'global_max_open_zones' value is limited by %u\n", ZBD_MAX_OPEN_ZONES); > + return 1; > + } > + } > + > return 0; > } > > @@ -714,6 +737,7 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f, > (ZBD_MAX_OPEN_ZONES - (open_zone_idx + 1)) * > sizeof(f->zbd_info->open_zones[0])); > f->zbd_info->num_open_zones--; > + td->num_open_zones--; > f->zbd_info->zone_info[zone_idx].open = 0; > } > > @@ -895,8 +919,10 @@ static bool is_zone_open(const struct thread_data *td, const struct fio_file *f, > struct zoned_block_device_info *zbdi = f->zbd_info; > int i; > > - assert(td->o.max_open_zones <= ARRAY_SIZE(zbdi->open_zones)); > - assert(zbdi->num_open_zones <= td->o.max_open_zones); > + assert(td->o.max_open_zones == 0 || td->num_open_zones <= td->o.max_open_zones); > + > + assert(td->o.max_open_zones <= zbdi->max_open_zones); > + assert(zbdi->num_open_zones <= zbdi->max_open_zones); > > for (i = 0; i < zbdi->num_open_zones; i++) > if (zbdi->open_zones[i] == zone_idx) > @@ -937,10 +963,13 @@ static bool zbd_open_zone(struct thread_data *td, const struct io_u *io_u, > if (is_zone_open(td, f, zone_idx)) > goto out; > res = false; > - if (f->zbd_info->num_open_zones >= td->o.max_open_zones) > + if (td->num_open_zones >= td->o.max_open_zones) > + goto out; > + if (f->zbd_info->num_open_zones >= f->zbd_info->max_open_zones) > goto out; > dprint(FD_ZBD, "%s: opening zone %d\n", f->file_name, zone_idx); > f->zbd_info->open_zones[f->zbd_info->num_open_zones++] = zone_idx; > + td->num_open_zones++; > z->open = 1; > res = true; > > diff --git a/zbd.h b/zbd.h > index 5a660399..fb39fb82 100644 > --- a/zbd.h > +++ b/zbd.h > @@ -45,6 +45,8 @@ struct fio_zone_info { > /** > * zoned_block_device_info - zoned block device characteristics > * @model: Device model. > + * @max_open_zones: global limit on the number of simultaneously opened > + * sequential write zones. > * @mutex: Protects the modifiable members in this structure (refcount and > * num_open_zones). > * @zone_size: size of a single zone in units of 512 bytes > @@ -65,6 +67,7 @@ struct fio_zone_info { > */ > struct zoned_block_device_info { > enum zbd_zoned_model model; > + uint32_t max_open_zones; > pthread_mutex_t mutex; > uint64_t zone_size; > uint64_t sectors_with_data; > -- Damien Le Moal Western Digital Research