To decide the first IO direction of randrw workload, the function zbd_adjust_ddir() refers to the zbd_info->sectors_with_data value which indicates the number of bytes written to the zoned block devices being accessed. However, this accounting has two issues. The first issue is wrong accounting for multiple jobs with different write ranges. The second issue is job start up failure due to zone lock contention. Avoid using zbd_info->sectors_with_data and simply refer to file-> last_start[DDIR_WRITE] instead. It is initialized with -1ULL for each job. After any write operation is done by the job, it keeps valid offset. If it has valid offset, written data is expected and the first IO direction can be read. Also remove zbd_info->sectors_with_data, which is no longer used. Keep the field zbd_info->wp_sectors_with_data since it is still used for zones with write pointers. Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@xxxxxxx> --- zbd.c | 14 +++----------- zbd.h | 2 -- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/zbd.c b/zbd.c index d1e469f6..f5e76c40 100644 --- a/zbd.c +++ b/zbd.c @@ -286,7 +286,6 @@ static int zbd_reset_zone(struct thread_data *td, struct fio_file *f, } pthread_mutex_lock(&f->zbd_info->mutex); - f->zbd_info->sectors_with_data -= data_in_zone; f->zbd_info->wp_sectors_with_data -= data_in_zone; pthread_mutex_unlock(&f->zbd_info->mutex); @@ -1201,7 +1200,6 @@ static uint64_t zbd_process_swd(struct thread_data *td, const struct fio_file *f, enum swd_action a) { struct fio_zone_info *zb, *ze, *z; - uint64_t swd = 0; uint64_t wp_swd = 0; zb = zbd_get_zone(f, f->min_zone); @@ -1211,17 +1209,14 @@ static uint64_t zbd_process_swd(struct thread_data *td, zone_lock(td, f, z); wp_swd += z->wp - z->start; } - swd += z->wp - z->start; } pthread_mutex_lock(&f->zbd_info->mutex); switch (a) { case CHECK_SWD: - assert(f->zbd_info->sectors_with_data == swd); assert(f->zbd_info->wp_sectors_with_data == wp_swd); break; case SET_SWD: - f->zbd_info->sectors_with_data = swd; f->zbd_info->wp_sectors_with_data = wp_swd; break; } @@ -1231,7 +1226,7 @@ static uint64_t zbd_process_swd(struct thread_data *td, if (z->has_wp) zone_unlock(z); - return swd; + return wp_swd; } /* @@ -1640,10 +1635,8 @@ static void zbd_queue_io(struct thread_data *td, struct io_u *io_u, int q, * have occurred. */ pthread_mutex_lock(&zbd_info->mutex); - if (z->wp <= zone_end) { - zbd_info->sectors_with_data += zone_end - z->wp; + if (z->wp <= zone_end) zbd_info->wp_sectors_with_data += zone_end - z->wp; - } pthread_mutex_unlock(&zbd_info->mutex); z->wp = zone_end; break; @@ -1801,8 +1794,7 @@ enum fio_ddir zbd_adjust_ddir(struct thread_data *td, struct io_u *io_u, if (ddir != DDIR_READ || !td_rw(td)) return ddir; - if (io_u->file->zbd_info->sectors_with_data || - td->o.read_beyond_wp) + if (io_u->file->last_start[DDIR_WRITE] != -1ULL || td->o.read_beyond_wp) return DDIR_READ; return DDIR_WRITE; diff --git a/zbd.h b/zbd.h index d425707e..9ab25c47 100644 --- a/zbd.h +++ b/zbd.h @@ -54,7 +54,6 @@ struct fio_zone_info { * @mutex: Protects the modifiable members in this structure (refcount and * num_open_zones). * @zone_size: size of a single zone in bytes. - * @sectors_with_data: total size of data in all zones in units of 512 bytes * @wp_sectors_with_data: total size of data in zones with write pointers in * units of 512 bytes * @zone_size_log2: log2 of the zone size in bytes if it is a power of 2 or 0 @@ -76,7 +75,6 @@ struct zoned_block_device_info { uint32_t max_open_zones; pthread_mutex_t mutex; uint64_t zone_size; - uint64_t sectors_with_data; uint64_t wp_sectors_with_data; uint32_t zone_size_log2; uint32_t nr_zones; -- 2.38.1