[PATCH v4 04/13] zbd: finish zones with remainder smaller than minimum write block size

"Shin'ichiro Kawasaki" <shinichiro.kawasaki@xxxxxxx> · Tue, 8 Nov 2022 17:49:31 +0900

When zonemode is zbd and block size is not divisor of zone size, write
target zone selection does not work as expected. When the write is
random write and the device has max open zone limit, the random write is
repeated to the zones selected up to the max open zone limit. All writes
are repeated only to the zones. When the write is sequential write,
write is done only for the first zone. The cause of such unexpected zone
selection is current write target zone selection logic. It selects write
target zones within open zones. When block size is not divisor of zone
size, the selected open zone has only remainder of writable blocks
smaller than the block size. Fio resets such zone after zone selection
and continues writing to it. This zone reset is required not to exceed
the limit of max_open_zones option or max_active_zone limit of the zoned
device, but it does not simulate the workload.

To avoid the zone reset and unexpected write to same zone, fix write
target zone handling of zones with remainder smaller than write block
size. Do not reset but finish such zone not to exceed the max_open_zones
option and max_active_zone limit. Then choose the zone next to the
finished zone as write target. To implement this, add the helper
function zbd_finish_zone().

Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@xxxxxxx>
Tested-by: Dmitry Fomichev <dmitry.fomichev@xxxxxxx>
Reviewed-by: Dmitry Fomichev <dmitry.fomichev@xxxxxxx>
---
 zbd.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/zbd.c b/zbd.c
index 26a6404d..9ab78e2e 100644
--- a/zbd.c
+++ b/zbd.c
@@ -334,6 +334,44 @@ static void zbd_close_zone(struct thread_data *td, const struct fio_file *f,
 	z->open = 0;
 }
 
+/**
+ * zbd_finish_zone - finish the specified zone
+ * @td: FIO thread data.
+ * @f: FIO file for which to finish a zone
+ * @z: Zone to finish.
+ *
+ * Finish the zone at @offset with open or close status.
+ */
+static int zbd_finish_zone(struct thread_data *td, struct fio_file *f,
+			   struct fio_zone_info *z)
+{
+	uint64_t offset = z->start;
+	uint64_t length = f->zbd_info->zone_size;
+	int ret = 0;
+
+	switch (f->zbd_info->model) {
+	case ZBD_HOST_AWARE:
+	case ZBD_HOST_MANAGED:
+		if (td->io_ops && td->io_ops->finish_zone)
+			ret = td->io_ops->finish_zone(td, f, offset, length);
+		else
+			ret = blkzoned_finish_zone(td, f, offset, length);
+		break;
+	default:
+		break;
+	}
+
+	if (ret < 0) {
+		td_verror(td, errno, "finish zone failed");
+		log_err("%s: finish zone at sector %"PRIu64" failed (%d).\n",
+			f->file_name, offset >> 9, errno);
+	} else {
+		z->wp = (z+1)->start;
+	}
+
+	return ret;
+}
+
 /**
  * zbd_reset_zones - Reset a range of zones.
  * @td: fio thread data.
@@ -1953,6 +1991,33 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 			goto eof;
 		}
 
+retry:
+		if (zbd_zone_remainder(zb) > 0 &&
+		    zbd_zone_remainder(zb) < min_bs) {
+			pthread_mutex_lock(&f->zbd_info->mutex);
+			zbd_close_zone(td, f, zb);
+			pthread_mutex_unlock(&f->zbd_info->mutex);
+			dprint(FD_ZBD,
+			       "%s: finish zone %d\n",
+			       f->file_name, zbd_zone_idx(f, zb));
+			io_u_quiesce(td);
+			zbd_finish_zone(td, f, zb);
+			if (zbd_zone_idx(f, zb) + 1 >= f->max_zone) {
+				if (!td_random(td))
+					goto eof;
+			}
+			zone_unlock(zb);
+
+			/* Find the next write pointer zone */
+			do {
+				zb++;
+				if (zbd_zone_idx(f, zb) >= f->max_zone)
+					zb = zbd_get_zone(f, f->min_zone);
+			} while (!zb->has_wp);
+
+			zone_lock(td, f, zb);
+		}
+
 		if (!zbd_open_zone(td, f, zb)) {
 			zone_unlock(zb);
 			zb = zbd_convert_to_open_zone(td, io_u);
@@ -1963,6 +2028,10 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 			}
 		}
 
+		if (zbd_zone_remainder(zb) > 0 &&
+		    zbd_zone_remainder(zb) < min_bs)
+			goto retry;
+
 		/* Check whether the zone reset threshold has been exceeded */
 		if (td->o.zrf.u.f) {
 			if (zbdi->wp_sectors_with_data >= f->io_size * td->o.zrt.u.f &&
-- 
2.37.1