Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit d13596b225baf61425a9ca92b0583fc3fa97765d:

  Fio 3.21 (2020-07-20 16:37:50 -0600)

are available in the Git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 5090d1d0f2a109c276384c93308566b7a3bfa5ad:

  zbd: fix %lu -> %llu dprint() formatting (2020-07-21 09:40:07 -0600)

----------------------------------------------------------------
Hans Holmberg (3):
      options: Add zonecapacity option for zonemode=zbd
      t/zbd: Support testing zone capacity smaller than zone size
      t/zbd: Add test case to check zonecapacity option

Jens Axboe (1):
      zbd: fix %lu -> %llu dprint() formatting

Shin'ichiro Kawasaki (3):
      zbd: Support zone capacity smaller than zone size
      t/zbd: Mandate blkzone capacity report for devices with zone capacity
      t/zbd: Support testing zone capacity smaller than zone size with null_blk

 HOWTO                               |  18 +++++-
 cconv.c                             |   2 +
 configure                           |  19 ++++++
 engines/libzbc.c                    |   5 ++
 fio.1                               |  13 +++-
 options.c                           |  11 ++++
 oslib/linux-blkzoned.c              |  11 ++++
 t/zbd/functions                     |  82 ++++++++++++++++++++++++
 t/zbd/run-tests-against-zoned-nullb |  30 ++++++++-
 t/zbd/test-zbd-support              | 123 ++++++++++++++++++++++++++----------
 thread_options.h                    |   2 +
 zbd.c                               |  87 ++++++++++++++++++++-----
 zbd.h                               |   2 +
 zbd_types.h                         |   1 +
 14 files changed, 348 insertions(+), 58 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO b/HOWTO
index 8cf8d650..35ead0cb 100644
--- a/HOWTO
+++ b/HOWTO
@@ -970,14 +970,15 @@ Target file/device
 	Accepted values are:
 
 		**none**
-				The :option:`zonerange`, :option:`zonesize` and
-				:option:`zoneskip` parameters are ignored.
+				The :option:`zonerange`, :option:`zonesize`,
+				:option `zonecapacity` and option:`zoneskip`
+				parameters are ignored.
 		**strided**
 				I/O happens in a single zone until
 				:option:`zonesize` bytes have been transferred.
 				After that number of bytes has been
 				transferred processing of the next zone
-				starts.
+				starts. :option `zonecapacity` is ignored.
 		**zbd**
 				Zoned block device mode. I/O happens
 				sequentially in each zone, even if random I/O
@@ -1004,6 +1005,17 @@ Target file/device
 	For :option:`zonemode` =zbd, this is the size of a single zone. The
 	:option:`zonerange` parameter is ignored in this mode.
 
+
+.. option:: zonecapacity=int
+
+	For :option:`zonemode` =zbd, this defines the capacity of a single zone,
+	which is the accessible area starting from the zone start address.
+	This parameter only applies when using :option:`zonemode` =zbd in
+	combination with regular block devices. If not specified it defaults to
+	the zone size. If the target device is a zoned block device, the zone
+	capacity is obtained from the device information and this option is
+	ignored.
+
 .. option:: zoneskip=int
 
 	For :option:`zonemode` =strided, the number of bytes to skip after
diff --git a/cconv.c b/cconv.c
index 449bcf7b..2469389b 100644
--- a/cconv.c
+++ b/cconv.c
@@ -223,6 +223,7 @@ void convert_thread_options_to_cpu(struct thread_options *o,
 	o->ss_limit.u.f = fio_uint64_to_double(le64_to_cpu(top->ss_limit.u.i));
 	o->zone_range = le64_to_cpu(top->zone_range);
 	o->zone_size = le64_to_cpu(top->zone_size);
+	o->zone_capacity = le64_to_cpu(top->zone_capacity);
 	o->zone_skip = le64_to_cpu(top->zone_skip);
 	o->zone_mode = le32_to_cpu(top->zone_mode);
 	o->lockmem = le64_to_cpu(top->lockmem);
@@ -563,6 +564,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
 	top->ss_limit.u.i = __cpu_to_le64(fio_double_to_uint64(o->ss_limit.u.f));
 	top->zone_range = __cpu_to_le64(o->zone_range);
 	top->zone_size = __cpu_to_le64(o->zone_size);
+	top->zone_capacity = __cpu_to_le64(o->zone_capacity);
 	top->zone_skip = __cpu_to_le64(o->zone_skip);
 	top->zone_mode = __cpu_to_le32(o->zone_mode);
 	top->lockmem = __cpu_to_le64(o->lockmem);
diff --git a/configure b/configure
index 6991393b..b079a2a5 100755
--- a/configure
+++ b/configure
@@ -2390,6 +2390,7 @@ if compile_prog "" "" "valgrind_dev"; then
 fi
 print_config "Valgrind headers" "$valgrind_dev"
 
+if test "$targetos" = "Linux" ; then
 ##########################################
 # <linux/blkzoned.h> probe
 if test "$linux_blkzoned" != "yes" ; then
@@ -2407,6 +2408,24 @@ if compile_prog "" "" "linux_blkzoned"; then
 fi
 print_config "Zoned block device support" "$linux_blkzoned"
 
+##########################################
+# Check BLK_ZONE_REP_CAPACITY
+cat > $TMPC << EOF
+#include <linux/blkzoned.h>
+int main(void)
+{
+  return BLK_ZONE_REP_CAPACITY;
+}
+EOF
+if compile_prog "" "" "blkzoned report capacity"; then
+  output_sym "CONFIG_HAVE_REP_CAPACITY"
+  rep_capacity="yes"
+else
+  rep_capacity="no"
+fi
+print_config "Zoned block device capacity" "$rep_capacity"
+fi
+
 ##########################################
 # libzbc probe
 if test "$libzbc" != "yes" ; then
diff --git a/engines/libzbc.c b/engines/libzbc.c
index fdde8ca6..4b900233 100644
--- a/engines/libzbc.c
+++ b/engines/libzbc.c
@@ -235,6 +235,11 @@ static int libzbc_report_zones(struct thread_data *td, struct fio_file *f,
 		zbdz->start = zones[i].zbz_start << 9;
 		zbdz->len = zones[i].zbz_length << 9;
 		zbdz->wp = zones[i].zbz_write_pointer << 9;
+		/*
+		 * ZBC/ZAC do not define zone capacity, so use the zone size as
+		 * the zone capacity.
+		 */
+		zbdz->capacity = zbdz->len;
 
 		switch (zones[i].zbz_type) {
 		case ZBC_ZT_CONVENTIONAL:
diff --git a/fio.1 b/fio.1
index f134e0bf..a3d348b2 100644
--- a/fio.1
+++ b/fio.1
@@ -738,12 +738,13 @@ Accepted values are:
 .RS
 .TP
 .B none
-The \fBzonerange\fR, \fBzonesize\fR and \fBzoneskip\fR parameters are ignored.
+The \fBzonerange\fR, \fBzonesize\fR \fBzonecapacity\fR and \fBzoneskip\fR
+parameters are ignored.
 .TP
 .B strided
 I/O happens in a single zone until \fBzonesize\fR bytes have been transferred.
 After that number of bytes has been transferred processing of the next zone
-starts.
+starts. The \fBzonecapacity\fR parameter is ignored.
 .TP
 .B zbd
 Zoned block device mode. I/O happens sequentially in each zone, even if random
@@ -771,6 +772,14 @@ zoned block device, the specified \fBzonesize\fR must be 0 or equal to the
 device zone size. For a regular block device or file, the specified
 \fBzonesize\fR must be at least 512B.
 .TP
+.BI zonecapacity \fR=\fPint
+For \fBzonemode\fR=zbd, this defines the capacity of a single zone, which is
+the accessible area starting from the zone start address. This parameter only
+applies when using \fBzonemode\fR=zbd in combination with regular block devices.
+If not specified it defaults to the zone size. If the target device is a zoned
+block device, the zone capacity is obtained from the device information and this
+option is ignored.
+.TP
 .BI zoneskip \fR=\fPint
 For \fBzonemode\fR=strided, the number of bytes to skip after \fBzonesize\fR
 bytes of data have been transferred.
diff --git a/options.c b/options.c
index 85a0f490..251ad2c1 100644
--- a/options.c
+++ b/options.c
@@ -3327,6 +3327,17 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 		.category = FIO_OPT_C_IO,
 		.group	= FIO_OPT_G_ZONE,
 	},
+	{
+		.name	= "zonecapacity",
+		.lname	= "Zone capacity",
+		.type	= FIO_OPT_STR_VAL,
+		.off1	= offsetof(struct thread_options, zone_capacity),
+		.help	= "Capacity per zone",
+		.def	= "0",
+		.interval = 1024 * 1024,
+		.category = FIO_OPT_C_IO,
+		.group	= FIO_OPT_G_ZONE,
+	},
 	{
 		.name	= "zonerange",
 		.lname	= "Zone range",
diff --git a/oslib/linux-blkzoned.c b/oslib/linux-blkzoned.c
index 1cf06363..6fe78b9c 100644
--- a/oslib/linux-blkzoned.c
+++ b/oslib/linux-blkzoned.c
@@ -113,6 +113,16 @@ out:
 	return 0;
 }
 
+static uint64_t zone_capacity(struct blk_zone_report *hdr,
+			      struct blk_zone *blkz)
+{
+#ifdef CONFIG_HAVE_REP_CAPACITY
+	if (hdr->flags & BLK_ZONE_REP_CAPACITY)
+		return blkz->capacity << 9;
+#endif
+	return blkz->len << 9;
+}
+
 int blkzoned_report_zones(struct thread_data *td, struct fio_file *f,
 			  uint64_t offset, struct zbd_zone *zones,
 			  unsigned int nr_zones)
@@ -149,6 +159,7 @@ int blkzoned_report_zones(struct thread_data *td, struct fio_file *f,
 		z->start = blkz->start << 9;
 		z->wp = blkz->wp << 9;
 		z->len = blkz->len << 9;
+		z->capacity = zone_capacity(hdr, blkz);
 
 		switch (blkz->type) {
 		case BLK_ZONE_TYPE_CONVENTIONAL:
diff --git a/t/zbd/functions b/t/zbd/functions
index 1bd22ec4..81b6f3f7 100644
--- a/t/zbd/functions
+++ b/t/zbd/functions
@@ -19,6 +19,51 @@ if [ -n "${use_libzbc}" ] &&
     exit 1
 fi
 
+blkzone_reports_capacity() {
+	local dev="${1}"
+
+	[[ -n "${blkzone}" ]] &&
+		"${blkzone}" report -c 1 -o 0 "${dev}" | grep -q 'cap '
+}
+
+# Whether or not $1 (/dev/...) is a NVME ZNS device.
+is_nvme_zns() {
+	local s
+
+	s=/sys/block/$(basename "${1}")/device/subsystem
+
+	if [[ ! -h "${s}" || $(realpath "${s}") != /sys/class/nvme ]]; then
+		return 1
+	fi
+
+	[[ $(</sys/block/$(basename "${1}")/queue/zoned) == host-managed ]]
+}
+
+# Whether or not $1 (/dev/...) is a null_blk device with zone capacity smaller
+# than zone size.
+is_nullb_with_zone_cap() {
+	local f
+
+	f=/sys/kernel/config/nullb/$(basename "${1}")
+	[[ -r "${f}/zone_capacity" &&
+		   $(<"${f}/zone_capacity") -lt $(<"${f}/zone_size") ]]
+}
+
+# Check if blkzone is available and suitable for the test target device. If not
+# available, print error message and return 1. Otherwise return 0.
+check_blkzone() {
+	local dev="${1}"
+
+	# If the device supports zone capacity, mandate zone capacity report by
+	# blkzone.
+	if (is_nvme_zns "${dev}" || is_nullb_with_zone_cap "${dev}") &&
+				! blkzone_reports_capacity "${dev}"; then
+		echo "Error: blkzone does not report zone capacity"
+		echo "Error: install latest util-linux with blkzone"
+		return 1
+	fi
+}
+
 # Reports the starting sector and length of the first sequential zone of device
 # $1.
 first_sequential_zone() {
@@ -39,6 +84,43 @@ first_sequential_zone() {
     fi
 }
 
+# Reports the summed zone capacity of $1 number of zones starting from offset $2
+# on device $3.
+total_zone_capacity() {
+	local nr_zones=$1
+	local sector=$(($2 / 512))
+	local dev=$3
+	local capacity=0 num
+	local grep_str
+
+	if [ -z "$is_zbd" ]; then
+		# For regular block devices, handle zone size as zone capacity.
+		echo $((zone_size * nr_zones))
+		return
+	fi
+
+	if [ -n "${blkzone}" ] && [ ! -n "${use_libzbc}" ]; then
+		if blkzone_reports_capacity "${dev}"; then
+			grep_str='cap \K[0-9a-zA-Z]*'
+		else
+			# If zone capacity is not reported, refer zone length.
+			grep_str='len \K[0-9a-zA-Z]*'
+		fi
+		while read num; do
+			capacity=$((capacity + num))
+		done < <(${blkzone} report -c "$nr_zones" -o "$sector" "$dev" |
+				grep -Po "${grep_str}")
+	else
+		# ZBC devices do not have zone capacity. Use zone size.
+		while read num; do
+			capacity=$((capacity + num))
+		done < <(${zbc_report_zones} -nz "$nr_zones" -start "$sector" \
+				"$dev" | grep -Po 'sector [0-9]*, \K[0-9]*')
+	fi
+
+	echo $((capacity * 512))
+}
+
 max_open_zones() {
     local dev=$1
 
diff --git a/t/zbd/run-tests-against-zoned-nullb b/t/zbd/run-tests-against-zoned-nullb
index 53aee3e8..f9c9530c 100755
--- a/t/zbd/run-tests-against-zoned-nullb
+++ b/t/zbd/run-tests-against-zoned-nullb
@@ -6,6 +6,21 @@
 
 scriptdir="$(cd "$(dirname "$0")" && pwd)"
 
+zone_size=1
+zone_capacity=1
+if [[ ${1} == "-h" ]]; then
+    echo "Usage: ${0} [OPTIONS]"
+    echo "Options:"
+    echo -e "\t-h Show this message."
+    echo -e "\t-zone-cap Use null blk with zone capacity less than zone size."
+    echo -e "\tany option supported by test-zbd-support script."
+    exit 1
+elif [[ ${1} == "-zone-cap" ]]; then
+    zone_size=4
+    zone_capacity=3
+    shift
+fi
+
 for d in /sys/kernel/config/nullb/*; do [ -d "$d" ] && rmdir "$d"; done
 modprobe -r null_blk
 modprobe null_blk nr_devices=0 || exit $?
@@ -17,9 +32,18 @@ modprobe -r null_blk
 modprobe null_blk nr_devices=0 &&
     cd /sys/kernel/config/nullb &&
     mkdir nullb0 &&
-    cd nullb0 &&
-    echo 1 > zoned &&
-    echo 1 > zone_size &&
+    cd nullb0 || exit $?
+
+if ((zone_capacity < zone_size)); then
+    if [[ ! -w zone_capacity ]]; then
+        echo "null blk does not support zone capacity"
+        exit 1
+    fi
+    echo "${zone_capacity}" > zone_capacity
+fi
+
+echo 1 > zoned &&
+    echo "${zone_size}" > zone_size &&
     echo 0 > completion_nsec &&
     echo 4096 > blocksize &&
     echo 1024 > size &&
diff --git a/t/zbd/test-zbd-support b/t/zbd/test-zbd-support
index 80dc3f30..e53a20c5 100755
--- a/t/zbd/test-zbd-support
+++ b/t/zbd/test-zbd-support
@@ -204,55 +204,64 @@ test4() {
 
 # Sequential write to sequential zones.
 test5() {
-    local size
+    local size off capacity
 
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 4 $off $dev)
     size=$((4 * zone_size))
     run_fio_on_seq "$(ioengine "psync")" --iodepth=1 --rw=write	\
 		   --bs="$(max $((zone_size / 64)) "$logical_block_size")"\
 		   --do_verify=1 --verify=md5				\
 		   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Sequential read from sequential zones.
 test6() {
-    local size
+    local size off capacity
 
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 4 $off $dev)
     size=$((4 * zone_size))
     write_and_run_one_fio_job \
 	    $((first_sequential_zone_sector * 512)) "${size}" \
-	    --offset=$((first_sequential_zone_sector * 512)) \
+	    --offset="${off}" \
 	    --size="${size}" --zonemode=zbd --zonesize="${zone_size}" \
 	    "$(ioengine "psync")" --iodepth=1 --rw=read \
 	    --bs="$(max $((zone_size / 64)) "$logical_block_size")" \
 	    >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_read $size || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, libaio, queue depth 1.
 test7() {
     local size=$((zone_size))
+    local off capacity
 
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 1 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=1 --rw=randwrite	\
 		   --bs="$(min 16384 "${zone_size}")"			\
 		   --do_verify=1 --verify=md5 --size="$size"		\
 		   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, libaio, queue depth 64.
 test8() {
-    local size
+    local size off capacity
 
     size=$((4 * zone_size))
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 4 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=64 --rw=randwrite	\
 		   --bs="$(min 16384 "${zone_size}")"			\
 		   --do_verify=1 --verify=md5				\
 		   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, sg, queue depth 1.
@@ -293,39 +302,45 @@ test10() {
 
 # Random write to sequential zones, libaio, queue depth 64, random block size.
 test11() {
-    local size
+    local size off capacity
 
     size=$((4 * zone_size))
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 4 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=64 --rw=randwrite	\
 		   --bsrange=4K-64K --do_verify=1 --verify=md5		\
 		   --debug=zbd >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, libaio, queue depth 64, max 1 open zone.
 test12() {
-    local size
+    local size off capacity
 
     size=$((8 * zone_size))
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 8 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=64 --rw=randwrite --bs=16K \
 		   --max_open_zones=1 --size=$size --do_verify=1 --verify=md5 \
 		   --debug=zbd >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to sequential zones, libaio, queue depth 64, max 4 open zones.
 test13() {
-    local size
+    local size off capacity
 
     size=$((8 * zone_size))
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 8 $off $dev)
     run_fio_on_seq "$(ioengine "libaio")" --iodepth=64 --rw=randwrite --bs=16K \
 		   --max_open_zones=4 --size=$size --do_verify=1 --verify=md5 \
 		   --debug=zbd						      \
 		   >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $size || return $?
-    check_read $size || return $?
+    check_written $capacity || return $?
+    check_read $capacity || return $?
 }
 
 # Random write to conventional zones.
@@ -349,7 +364,7 @@ test14() {
 # Sequential read on a mix of empty and full zones.
 test15() {
     local i off size
-    local w_off w_size
+    local w_off w_size w_capacity
 
     for ((i=0;i<4;i++)); do
 	[ -n "$is_zbd" ] &&
@@ -358,6 +373,7 @@ test15() {
     done
     w_off=$(((first_sequential_zone_sector + 2 * sectors_per_zone) * 512))
     w_size=$((2 * zone_size))
+    w_capacity=$(total_zone_capacity 2 $w_off $dev)
     off=$((first_sequential_zone_sector * 512))
     size=$((4 * zone_size))
     write_and_run_one_fio_job "${w_off}" "${w_size}" \
@@ -365,14 +381,14 @@ test15() {
 		    --zonemode=zbd --zonesize="${zone_size}" --offset=$off \
 		    --size=$((size)) >>"${logfile}.${test_number}" 2>&1 ||
 	return $?
-    check_written $((w_size)) || return $?
-    check_read $((size / 2))
+    check_written $((w_capacity)) || return $?
+    check_read $((w_capacity))
 }
 
 # Random read on a mix of empty and full zones.
 test16() {
     local off size
-    local i w_off w_size
+    local i w_off w_size w_capacity
 
     for ((i=0;i<4;i++)); do
 	[ -n "$is_zbd" ] &&
@@ -381,13 +397,14 @@ test16() {
     done
     w_off=$(((first_sequential_zone_sector + 2 * sectors_per_zone) * 512))
     w_size=$((2 * zone_size))
+    w_capacity=$(total_zone_capacity 2 $w_off $dev)
     off=$((first_sequential_zone_sector * 512))
     size=$((4 * zone_size))
     write_and_run_one_fio_job "${w_off}" "${w_size}" \
 		    "$(ioengine "libaio")" --iodepth=64 --rw=randread --bs=16K \
 		    --zonemode=zbd --zonesize="${zone_size}" --offset=$off \
 		    --size=$size >>"${logfile}.${test_number}" 2>&1 || return $?
-    check_written $w_size || return $?
+    check_written $w_capacity || return $?
     check_read $size || return $?
 }
 
@@ -451,13 +468,17 @@ test23() {
 
 test24() {
     local bs loops=9 size=$((zone_size))
+    local off capacity
+
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 1 $off $dev)
 
     bs=$(min $((256*1024)) "$zone_size")
     run_fio_on_seq "$(ioengine "psync")" --rw=write --bs="$bs"		\
 		   --size=$size --loops=$loops				\
 		   --zone_reset_frequency=.01 --zone_reset_threshold=.90 \
 		   >> "${logfile}.${test_number}" 2>&1 || return $?
-    check_written $((size * loops)) || return $?
+    check_written $((capacity * loops)) || return $?
     check_reset_count -eq 8 ||
 	check_reset_count -eq 9 ||
 	check_reset_count -eq 10 || return $?
@@ -483,15 +504,19 @@ test25() {
 
 write_to_first_seq_zone() {
     local loops=4 r
+    local off capacity
+
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 1 $off $dev)
 
     r=$(((RANDOM << 16) | RANDOM))
     run_fio --name="$dev" --filename="$dev" "$(ioengine "psync")" --rw="$1" \
 	    --thread=1 --do_verify=1 --verify=md5 --direct=1 --bs=4K	\
-	    --offset=$((first_sequential_zone_sector * 512))		\
-	    "--size=$zone_size" --loops=$loops --randseed="$r"		\
+	    --offset=$off						\
+	    --size=$zone_size --loops=$loops --randseed="$r"		\
 	    --zonemode=zbd --zonesize="${zone_size}" --group_reporting=1	\
 	    --gtod_reduce=1 >> "${logfile}.${test_number}" 2>&1 || return $?
-    check_written $((loops * zone_size)) || return $?
+    check_written $((loops * capacity)) || return $?
 }
 
 # Overwrite the first sequential zone four times sequentially.
@@ -511,15 +536,16 @@ test28() {
     off=$((first_sequential_zone_sector * 512 + 64 * zone_size))
     [ -n "$is_zbd" ] && reset_zone "$dev" $((off / 512))
     opts=("--debug=zbd")
+    capacity=$(total_zone_capacity 1 $off $dev)
     for ((i=0;i<jobs;i++)); do
 	opts+=("--name=job$i" "--filename=$dev" "--offset=$off" "--bs=16K")
-	opts+=("--size=$zone_size" "$(ioengine "psync")" "--rw=randwrite")
+	opts+=("--size=$zone_size" "--io_size=$capacity" "$(ioengine "psync")" "--rw=randwrite")
 	opts+=("--thread=1" "--direct=1" "--zonemode=zbd")
 	opts+=("--zonesize=${zone_size}" "--group_reporting=1")
 	opts+=(${var_opts[@]})
     done
     run_fio "${opts[@]}" >> "${logfile}.${test_number}" 2>&1 || return $?
-    check_written $((jobs * zone_size)) || return $?
+    check_written $((jobs * $capacity)) || return $?
     check_reset_count -eq $jobs ||
 	check_reset_count -eq $((jobs - 1)) ||
 	return $?
@@ -608,10 +634,13 @@ test32() {
 # zone size.
 test33() {
     local bs io_size size
+    local off capacity=0;
 
+    off=$((first_sequential_zone_sector * 512))
+    capacity=$(total_zone_capacity 1 $off $dev)
     size=$((2 * zone_size))
-    io_size=$((5 * zone_size))
-    bs=$((3 * zone_size / 4))
+    io_size=$((5 * capacity))
+    bs=$((3 * capacity / 4))
     run_fio_on_seq "$(ioengine "psync")" --iodepth=1 --rw=write	\
 		   --size=$size --io_size=$io_size --bs=$bs	\
 		   >> "${logfile}.${test_number}" 2>&1 || return $?
@@ -660,8 +689,9 @@ test36() {
 
 # Test 3/4 for the I/O boundary rounding code: $size > $zone_size.
 test37() {
-    local bs off size
+    local bs off size capacity
 
+    capacity=$(total_zone_capacity 1 $first_sequential_zone_sector $dev)
     if [ "$first_sequential_zone_sector" = 0 ]; then
 	off=0
     else
@@ -673,7 +703,7 @@ test37() {
 		    --iodepth=1 --rw=write --do_verify=1 --verify=md5	\
 		    --bs=$bs --zonemode=zbd --zonesize="${zone_size}"	\
 		    >> "${logfile}.${test_number}" 2>&1
-    check_written $((zone_size)) || return $?
+    check_written $capacity || return $?
 }
 
 # Test 4/4 for the I/O boundary rounding code: $offset > $disk_size - $zone_size
@@ -809,6 +839,26 @@ test48() {
 	    >> "${logfile}.${test_number}" 2>&1 || return $?
 }
 
+# Check if fio handles --zonecapacity on a normal block device correctly
+test49() {
+
+    if [ -n "$is_zbd" ]; then
+	echo "$dev is not a regular block device" \
+	     >>"${logfile}.${test_number}"
+	return 0
+    fi
+
+    size=$((2 * zone_size))
+    capacity=$((zone_size * 3 / 4))
+
+    run_one_fio_job "$(ioengine "psync")" --rw=write \
+		    --zonemode=zbd --zonesize="${zone_size}" \
+		    --zonecapacity=${capacity} \
+		    --verify=md5  --size=${size} >>"${logfile}.${test_number}" 2>&1 ||
+	return $?
+    check_read $((capacity * 2)) || return $?
+}
+
 tests=()
 dynamic_analyzer=()
 reset_all_zones=
@@ -863,6 +913,9 @@ if [[ -b "$realdev" ]]; then
 	case "$(<"/sys/class/block/$basename/queue/zoned")" in
 	host-managed|host-aware)
 		is_zbd=true
+		if ! check_blkzone "${dev}"; then
+			exit 1
+		fi
 		if ! result=($(first_sequential_zone "$dev")); then
 			echo "Failed to determine first sequential zone"
 			exit 1
diff --git a/thread_options.h b/thread_options.h
index 968ea0ab..3fe48ecc 100644
--- a/thread_options.h
+++ b/thread_options.h
@@ -193,6 +193,7 @@ struct thread_options {
 	unsigned int loops;
 	unsigned long long zone_range;
 	unsigned long long zone_size;
+	unsigned long long zone_capacity;
 	unsigned long long zone_skip;
 	enum fio_zone_mode zone_mode;
 	unsigned long long lockmem;
@@ -487,6 +488,7 @@ struct thread_options_pack {
 	uint32_t loops;
 	uint64_t zone_range;
 	uint64_t zone_size;
+	uint64_t zone_capacity;
 	uint64_t zone_skip;
 	uint64_t lockmem;
 	uint32_t mem_type;
diff --git a/zbd.c b/zbd.c
index cf2cded9..3eac5df3 100644
--- a/zbd.c
+++ b/zbd.c
@@ -140,6 +140,24 @@ static inline bool zbd_zone_swr(struct fio_zone_info *z)
 	return z->type == ZBD_ZONE_TYPE_SWR;
 }
 
+/**
+ * zbd_zone_end - Return zone end location
+ * @z: zone info pointer.
+ */
+static inline uint64_t zbd_zone_end(const struct fio_zone_info *z)
+{
+	return (z+1)->start;
+}
+
+/**
+ * zbd_zone_capacity_end - Return zone capacity limit end location
+ * @z: zone info pointer.
+ */
+static inline uint64_t zbd_zone_capacity_end(const struct fio_zone_info *z)
+{
+	return z->start + z->capacity;
+}
+
 /**
  * zbd_zone_full - verify whether a minimum number of bytes remain in a zone
  * @f: file pointer.
@@ -154,7 +172,7 @@ static bool zbd_zone_full(const struct fio_file *f, struct fio_zone_info *z,
 	assert((required & 511) == 0);
 
 	return zbd_zone_swr(z) &&
-		z->wp + required > z->start + f->zbd_info->zone_size;
+		z->wp + required > zbd_zone_capacity_end(z);
 }
 
 static void zone_lock(struct thread_data *td, struct fio_file *f, struct fio_zone_info *z)
@@ -271,7 +289,7 @@ static bool zbd_verify_sizes(void)
 			z = &f->zbd_info->zone_info[zone_idx];
 			if ((f->file_offset != z->start) &&
 			    (td->o.td_ddir != TD_DDIR_READ)) {
-				new_offset = (z+1)->start;
+				new_offset = zbd_zone_end(z);
 				if (new_offset >= f->file_offset + f->io_size) {
 					log_info("%s: io_size must be at least one zone\n",
 						 f->file_name);
@@ -353,6 +371,7 @@ static int init_zone_info(struct thread_data *td, struct fio_file *f)
 	uint32_t nr_zones;
 	struct fio_zone_info *p;
 	uint64_t zone_size = td->o.zone_size;
+	uint64_t zone_capacity = td->o.zone_capacity;
 	struct zoned_block_device_info *zbd_info = NULL;
 	int i;
 
@@ -368,6 +387,16 @@ static int init_zone_info(struct thread_data *td, struct fio_file *f)
 		return 1;
 	}
 
+	if (zone_capacity == 0)
+		zone_capacity = zone_size;
+
+	if (zone_capacity > zone_size) {
+		log_err("%s: job parameter zonecapacity %llu is larger than zone size %llu\n",
+			f->file_name, (unsigned long long) td->o.zone_capacity,
+			(unsigned long long) td->o.zone_size);
+		return 1;
+	}
+
 	nr_zones = (f->real_file_size + zone_size - 1) / zone_size;
 	zbd_info = scalloc(1, sizeof(*zbd_info) +
 			   (nr_zones + 1) * sizeof(zbd_info->zone_info[0]));
@@ -384,6 +413,7 @@ static int init_zone_info(struct thread_data *td, struct fio_file *f)
 		p->wp = p->start;
 		p->type = ZBD_ZONE_TYPE_SWR;
 		p->cond = ZBD_ZONE_COND_EMPTY;
+		p->capacity = zone_capacity;
 	}
 	/* a sentinel */
 	p->start = nr_zones * zone_size;
@@ -456,10 +486,11 @@ static int parse_zone_info(struct thread_data *td, struct fio_file *f)
 			mutex_init_pshared_with_type(&p->mutex,
 						     PTHREAD_MUTEX_RECURSIVE);
 			p->start = z->start;
+			p->capacity = z->capacity;
 			switch (z->cond) {
 			case ZBD_ZONE_COND_NOT_WP:
 			case ZBD_ZONE_COND_FULL:
-				p->wp = p->start + zone_size;
+				p->wp = p->start + p->capacity;
 				break;
 			default:
 				assert(z->start <= z->wp);
@@ -707,7 +738,7 @@ static int zbd_reset_zone(struct thread_data *td, struct fio_file *f,
 	dprint(FD_ZBD, "%s: resetting wp of zone %u.\n", f->file_name,
 		zbd_zone_nr(f->zbd_info, z));
 
-	return zbd_reset_range(td, f, z->start, (z+1)->start - z->start);
+	return zbd_reset_range(td, f, z->start, zbd_zone_end(z) - z->start);
 }
 
 /* The caller must hold f->zbd_info->mutex */
@@ -1068,7 +1099,7 @@ found_candidate_zone:
 	/* Both z->mutex and f->zbd_info->mutex are held. */
 
 examine_zone:
-	if (z->wp + min_bs <= (z+1)->start) {
+	if (z->wp + min_bs <= zbd_zone_capacity_end(z)) {
 		pthread_mutex_unlock(&f->zbd_info->mutex);
 		goto out;
 	}
@@ -1112,7 +1143,7 @@ examine_zone:
 		z = &f->zbd_info->zone_info[zone_idx];
 
 		zone_lock(td, f, z);
-		if (z->wp + min_bs <= (z+1)->start)
+		if (z->wp + min_bs <= zbd_zone_capacity_end(z))
 			goto out;
 		pthread_mutex_lock(&f->zbd_info->mutex);
 	}
@@ -1143,9 +1174,9 @@ static struct fio_zone_info *zbd_replay_write_order(struct thread_data *td,
 		assert(z);
 	}
 
-	if (z->verify_block * min_bs >= f->zbd_info->zone_size)
+	if (z->verify_block * min_bs >= z->capacity)
 		log_err("%s: %d * %d >= %llu\n", f->file_name, z->verify_block,
-			min_bs, (unsigned long long) f->zbd_info->zone_size);
+			min_bs, (unsigned long long)z->capacity);
 	io_u->offset = z->start + z->verify_block++ * min_bs;
 	return z;
 }
@@ -1231,7 +1262,7 @@ static void zbd_queue_io(struct io_u *io_u, int q, bool success)
 	switch (io_u->ddir) {
 	case DDIR_WRITE:
 		zone_end = min((uint64_t)(io_u->offset + io_u->buflen),
-			       (z + 1)->start);
+			       zbd_zone_capacity_end(z));
 		pthread_mutex_lock(&zbd_info->mutex);
 		/*
 		 * z->wp > zone_end means that one or more I/O errors
@@ -1327,6 +1358,28 @@ void setup_zbd_zone_mode(struct thread_data *td, struct io_u *io_u)
 	assert(td->o.zone_mode == ZONE_MODE_ZBD);
 	assert(td->o.zone_size);
 
+	zone_idx = zbd_zone_idx(f, f->last_pos[ddir]);
+	z = &f->zbd_info->zone_info[zone_idx];
+
+	/*
+	 * When the zone capacity is smaller than the zone size and the I/O is
+	 * sequential write, skip to zone end if the latest position is at the
+	 * zone capacity limit.
+	 */
+	if (z->capacity < f->zbd_info->zone_size && !td_random(td) &&
+	    ddir == DDIR_WRITE &&
+	    f->last_pos[ddir] >= zbd_zone_capacity_end(z)) {
+		dprint(FD_ZBD,
+		       "%s: Jump from zone capacity limit to zone end:"
+		       " (%llu -> %llu) for zone %u (%llu)\n",
+		       f->file_name, (unsigned long long) f->last_pos[ddir],
+		       (unsigned long long) zbd_zone_end(z),
+		       zbd_zone_nr(f->zbd_info, z),
+		       (unsigned long long) z->capacity);
+		td->io_skip_bytes += zbd_zone_end(z) - f->last_pos[ddir];
+		f->last_pos[ddir] = zbd_zone_end(z);
+	}
+
 	/*
 	 * zone_skip is valid only for sequential workloads.
 	 */
@@ -1340,11 +1393,8 @@ void setup_zbd_zone_mode(struct thread_data *td, struct io_u *io_u)
 	 * - For reads with td->o.read_beyond_wp == false, the last position
 	 *   reached the zone write pointer.
 	 */
-	zone_idx = zbd_zone_idx(f, f->last_pos[ddir]);
-	z = &f->zbd_info->zone_info[zone_idx];
-
 	if (td->zone_bytes >= td->o.zone_size ||
-	    f->last_pos[ddir] >= (z+1)->start ||
+	    f->last_pos[ddir] >= zbd_zone_end(z) ||
 	    (ddir == DDIR_READ &&
 	     (!td->o.read_beyond_wp) && f->last_pos[ddir] >= z->wp)) {
 		/*
@@ -1530,6 +1580,13 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 			zb->reset_zone = 0;
 			if (zbd_reset_zone(td, f, zb) < 0)
 				goto eof;
+
+			if (zb->capacity < min_bs) {
+				log_err("zone capacity %llu smaller than minimum block size %d\n",
+					(unsigned long long)zb->capacity,
+					min_bs);
+				goto eof;
+			}
 		}
 		/* Make writes occur at the write pointer */
 		assert(!zbd_zone_full(f, zb, min_bs));
@@ -1545,7 +1602,7 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 		 * small.
 		 */
 		new_len = min((unsigned long long)io_u->buflen,
-			      (zb + 1)->start - io_u->offset);
+			      zbd_zone_capacity_end(zb) - io_u->offset);
 		new_len = new_len / min_bs * min_bs;
 		if (new_len == io_u->buflen)
 			goto accept;
@@ -1556,7 +1613,7 @@ enum io_u_action zbd_adjust_block(struct thread_data *td, struct io_u *io_u)
 			goto accept;
 		}
 		log_err("Zone remainder %lld smaller than minimum block size %d\n",
-			((zb + 1)->start - io_u->offset),
+			(zbd_zone_capacity_end(zb) - io_u->offset),
 			min_bs);
 		goto eof;
 	case DDIR_TRIM:
diff --git a/zbd.h b/zbd.h
index e942a7f6..021174c1 100644
--- a/zbd.h
+++ b/zbd.h
@@ -23,6 +23,7 @@ enum io_u_action {
  * struct fio_zone_info - information about a single ZBD zone
  * @start: zone start location (bytes)
  * @wp: zone write pointer location (bytes)
+ * @capacity: maximum size usable from the start of a zone (bytes)
  * @verify_block: number of blocks that have been verified for this zone
  * @mutex: protects the modifiable members in this structure
  * @type: zone type (BLK_ZONE_TYPE_*)
@@ -35,6 +36,7 @@ struct fio_zone_info {
 	pthread_mutex_t		mutex;
 	uint64_t		start;
 	uint64_t		wp;
+	uint64_t		capacity;
 	uint32_t		verify_block;
 	enum zbd_zone_type	type:2;
 	enum zbd_zone_cond	cond:4;
diff --git a/zbd_types.h b/zbd_types.h
index d63c0d0a..5ed41aa0 100644
--- a/zbd_types.h
+++ b/zbd_types.h
@@ -50,6 +50,7 @@ struct zbd_zone {
 	uint64_t		start;
 	uint64_t		wp;
 	uint64_t		len;
+	uint64_t		capacity;
 	enum zbd_zone_type	type;
 	enum zbd_zone_cond	cond;
 };



[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux