On 2020/05/06 2:57, Alexey Dobriyan wrote: > It is not possible to maintain sustained per-thread iodepth in ZBD mode. > The way code is written, "max_open_zones" acts as a global limit, and > once one or few threads open all "max_open_zones" zones, other threads > can't open anything and _exit_ prematurely. > > This config is guaranteed to make equal number of zone resets/IO now: > each thread generates identical pattern and doesn't intersect with other > threads: > > zonemode=zbd > zonesize=... > rw=write > > numjobs=N > offset_increment=M*zonesize > > [j] > size=M*zonesize > > Patch introduces "job_max_open_zones" which is per-thread/process limit. > "max_open_zones" remains per file/device limit. Both limits are checked > for each open zone so one thread can't kick out others. > > Signed-off-by: Alexey Dobriyan (SK hynix) <adobriyan@xxxxxxxxx> > --- > fio.1 | 6 +++++- > fio.h | 1 + > options.c | 16 +++++++++++++--- > thread_options.h | 1 + > zbd.c | 49 +++++++++++++++++++++++++++++++++++++----------- > zbd.h | 3 +++ > 6 files changed, 61 insertions(+), 15 deletions(-) > > diff --git a/fio.1 b/fio.1 > index a2379f98..c0f2e7cf 100644 > --- a/fio.1 > +++ b/fio.1 > @@ -804,7 +804,11 @@ so. Default: false. > When running a random write test across an entire drive many more zones will be > open than in a typical application workload. Hence this command line option > that allows to limit the number of open zones. The number of open zones is > -defined as the number of zones to which write commands are issued. > +defined as the number of zones to which write commands are issued by all > +threads/processes. > +.TP > +.BI job_max_open_zones \fR=\fPint > +Limit on the number of simultaneously opened zones per single thread/process. > .TP > .BI zone_reset_threshold \fR=\fPfloat > A number between zero and one that indicates the ratio of logical blocks with > diff --git a/fio.h b/fio.h > index bbf057c1..20ca80e2 100644 > --- a/fio.h > +++ b/fio.h > @@ -260,6 +260,7 @@ struct thread_data { > struct frand_state prio_state; > > struct zone_split_index **zone_state_index; > + unsigned int num_open_zones; > > unsigned int verify_batch; > unsigned int trim_batch; > diff --git a/options.c b/options.c > index 2372c042..47b3b765 100644 > --- a/options.c > +++ b/options.c > @@ -3360,12 +3360,22 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { > }, > { > .name = "max_open_zones", > - .lname = "Maximum number of open zones", > + .lname = "Global maximum number of open zones", Instead of "Global", may be use "Per device file" ? Jobs operating on different files can each define a different value for max_open_zones, which really in that use case, make it a per device file limit. > .type = FIO_OPT_INT, > .off1 = offsetof(struct thread_options, max_open_zones), > .maxval = ZBD_MAX_OPEN_ZONES, > - .help = "Limit random writes to SMR drives to the specified" > - " number of sequential zones", > + .help = "Limit on the number of simultaneously opened sequential write zones in SMR/ZNS drives", Please change "in SMR/ZNS drives" to "with zonemode=zbd" since we can use regular disks with emulated zones too. > + .def = "0", > + .category = FIO_OPT_C_IO, > + .group = FIO_OPT_G_INVALID, > + }, > + { > + .name = "job_max_open_zones", > + .lname = "Job maximum number of open zones", > + .type = FIO_OPT_INT, > + .off1 = offsetof(struct thread_options, job_max_open_zones), > + .maxval = ZBD_MAX_OPEN_ZONES, > + .help = "Limit on the number of simultaneously opened sequential write zones in SMR/ZNS drives by one thread/process", Same here. > .def = "0", > .category = FIO_OPT_C_IO, > .group = FIO_OPT_G_INVALID, > diff --git a/thread_options.h b/thread_options.h > index c78ed43d..b0b493e4 100644 > --- a/thread_options.h > +++ b/thread_options.h > @@ -342,6 +342,7 @@ struct thread_options { > /* Parameters that affect zonemode=zbd */ > unsigned int read_beyond_wp; > int max_open_zones; > + unsigned int job_max_open_zones; > fio_fp64_t zrt; > fio_fp64_t zrf; > }; > diff --git a/zbd.c b/zbd.c > index 76771f2e..b34ceb34 100644 > --- a/zbd.c > +++ b/zbd.c > @@ -546,8 +546,10 @@ static int zbd_create_zone_info(struct thread_data *td, struct fio_file *f) > return -EINVAL; > } > > - if (ret == 0) > + if (ret == 0) { > f->zbd_info->model = zbd_model; > + f->zbd_info->max_open_zones = td->o.max_open_zones; > + } > return ret; > } > > @@ -622,6 +624,27 @@ int zbd_init(struct thread_data *td) > if (!zbd_verify_bs()) > return 1; > > + for_each_file(td, f, i) { > + struct zoned_block_device_info *zbd = f->zbd_info; > + > + if (!zbd) > + continue; > + > + if (zbd->max_open_zones == 0) { > + zbd->max_open_zones = ZBD_MAX_OPEN_ZONES; > + } No need for the "{}" brackets here. > + > + if (td->o.max_open_zones > 0 && > + zbd->max_open_zones != td->o.max_open_zones) { > + log_err("Different 'max_open_zones' values\n"); > + return 1; > + } > + if (zbd->max_open_zones > ZBD_MAX_OPEN_ZONES) { > + log_err("'max_open_zones' value is limited by %u\n", ZBD_MAX_OPEN_ZONES); > + return 1; > + } > + } > + > return 0; > } > > @@ -709,6 +732,7 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f, > (ZBD_MAX_OPEN_ZONES - (open_zone_idx + 1)) * > sizeof(f->zbd_info->open_zones[0])); > f->zbd_info->num_open_zones--; > + td->num_open_zones--; > f->zbd_info->zone_info[zone_idx].open = 0; > } > > @@ -888,8 +912,10 @@ static bool is_zone_open(const struct thread_data *td, const struct fio_file *f, > struct zoned_block_device_info *zbdi = f->zbd_info; > int i; > > - assert(td->o.max_open_zones <= ARRAY_SIZE(zbdi->open_zones)); > - assert(zbdi->num_open_zones <= td->o.max_open_zones); > + assert(td->o.job_max_open_zones == 0 || td->num_open_zones <= td->o.job_max_open_zones); > + I do not think that this white line is needed. > + assert(td->o.job_max_open_zones <= zbdi->max_open_zones); > + assert(zbdi->num_open_zones <= zbdi->max_open_zones); > > for (i = 0; i < zbdi->num_open_zones; i++) > if (zbdi->open_zones[i] == zone_idx) > @@ -922,18 +948,19 @@ static bool zbd_open_zone(struct thread_data *td, const struct io_u *io_u, > if (td->o.verify != VERIFY_NONE && zbd_zone_full(f, z, min_bs)) > return false; > > - /* Zero means no limit */ > - if (!td->o.max_open_zones) > - return true; > - > pthread_mutex_lock(&f->zbd_info->mutex); > if (is_zone_open(td, f, zone_idx)) > goto out; > res = false; > - if (f->zbd_info->num_open_zones >= td->o.max_open_zones) > + /* Zero means no limit */ > + if (td->o.job_max_open_zones > 0 && > + td->num_open_zones >= td->o.job_max_open_zones) > + goto out; > + if (f->zbd_info->num_open_zones >= f->zbd_info->max_open_zones) > goto out; > dprint(FD_ZBD, "%s: opening zone %d\n", f->file_name, zone_idx); > f->zbd_info->open_zones[f->zbd_info->num_open_zones++] = zone_idx; > + td->num_open_zones++; > z->open = 1; > res = true; > > @@ -968,7 +995,7 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, > > assert(is_valid_offset(f, io_u->offset)); > > - if (td->o.max_open_zones) { > + if (td->o.job_max_open_zones) { > /* > * This statement accesses f->zbd_info->open_zones[] on purpose > * without locking. > @@ -997,7 +1024,7 @@ static struct fio_zone_info *zbd_convert_to_open_zone(struct thread_data *td, > > zone_lock(td, f, z); > pthread_mutex_lock(&f->zbd_info->mutex); > - if (td->o.max_open_zones == 0) > + if (td->o.job_max_open_zones == 0) > goto examine_zone; > if (f->zbd_info->num_open_zones == 0) { > pthread_mutex_unlock(&f->zbd_info->mutex); > @@ -1053,7 +1080,7 @@ examine_zone: > } > dprint(FD_ZBD, "%s(%s): closing zone %d\n", __func__, f->file_name, > zone_idx); > - if (td->o.max_open_zones) > + if (td->o.job_max_open_zones) > zbd_close_zone(td, f, open_zone_idx); > pthread_mutex_unlock(&f->zbd_info->mutex); > > diff --git a/zbd.h b/zbd.h > index 5a660399..fb39fb82 100644 > --- a/zbd.h > +++ b/zbd.h > @@ -45,6 +45,8 @@ struct fio_zone_info { > /** > * zoned_block_device_info - zoned block device characteristics > * @model: Device model. > + * @max_open_zones: global limit on the number of simultaneously opened > + * sequential write zones. > * @mutex: Protects the modifiable members in this structure (refcount and > * num_open_zones). > * @zone_size: size of a single zone in units of 512 bytes > @@ -65,6 +67,7 @@ struct fio_zone_info { > */ > struct zoned_block_device_info { > enum zbd_zoned_model model; > + uint32_t max_open_zones; > pthread_mutex_t mutex; > uint64_t zone_size; > uint64_t sectors_with_data; > Apart from the nits above, it looks good. -- Damien Le Moal Western Digital Research