Re: [PATCH 1/3] trim: add support for multiple ranges

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2/14/24 07:06, Ankit Kumar wrote:
NVMe specification allow multiple ranges for the dataset management
commands. Currently the block ioctl only allows a single range for
trim, however multiple ranges can be specified using nvme character
device.

Add an option num_range to send multiple range per trim request, which
only works if the data direction is solely trim i.e. trim or randtrim.
Add FIO_MULTI_RANGE_TRIM as the ioengine flag, to restrict the usage of
this new option.
For multi range trim request this modifies the way IO buffers are used.
The buffer length will depend on number of trim ranges and the actual
buffer will contains start and length of each range entry.

Signed-off-by: Ankit Kumar <ankit.kumar@xxxxxxxxxxx>
---
  HOWTO.rst        |  7 +++++
  backend.c        | 20 ++++++++++--
  cconv.c          |  2 ++
  fio.1            |  6 ++++
  fio.h            | 18 +++++++++++
  init.c           | 13 ++++++++
  io_u.c           | 81 ++++++++++++++++++++++++++++++++++++++----------
  io_u.h           |  4 +++
  ioengines.h      |  2 ++
  options.c        | 11 +++++++
  thread_options.h |  3 ++
  11 files changed, 148 insertions(+), 19 deletions(-)

diff --git a/HOWTO.rst b/HOWTO.rst
index 5bc1713c..c798613b 100644
--- a/HOWTO.rst
+++ b/HOWTO.rst
@@ -2534,6 +2534,13 @@ with the caveat that when used on the command line, they must come after the
  	Specifies logical block application tag mask value, if namespace is
  	formatted to use end to end protection information. Default: 0xffff.
+.. option:: num_range=int : [io_uring_cmd]
+
+	For trim command this will be the number of ranges to trim per I/O
+	request. The number of logical blocks per range is determined by the
+	:option:`bs` option which should be a multiple of logical block size.
+	This cannot be used with read or write. Default: 1.
+

I would add a note here and in fio.1 that log_offset is not able to log all the offsets for trim operations with num_range > 1


  .. option:: cpuload=int : [cpuio]
Attempt to use the specified percentage of CPU cycles. This is a mandatory
diff --git a/backend.c b/backend.c
index 1fab467a..2f2221bf 100644
--- a/backend.c
+++ b/backend.c
@@ -1333,7 +1333,7 @@ static int init_io_u(struct thread_data *td)
  int init_io_u_buffers(struct thread_data *td)
  {
  	struct io_u *io_u;
-	unsigned long long max_bs, min_write;
+	unsigned long long max_bs, min_write, trim_bs = 0;
  	int i, max_units;
  	int data_xfer = 1;
  	char *p;
@@ -1344,7 +1344,18 @@ int init_io_u_buffers(struct thread_data *td)
  	td->orig_buffer_size = (unsigned long long) max_bs
  					* (unsigned long long) max_units;
- if (td_ioengine_flagged(td, FIO_NOIO) || !(td_read(td) || td_write(td)))
+	if (td_trim(td) && td->o.num_range > 1) {
+		trim_bs = td->o.num_range * sizeof(struct trim_range);
+		td->orig_buffer_size = trim_bs
+					* (unsigned long long) max_units;
+	}
+
+	/*
+	 * For reads, writes, and multi-range trim operations we need a
+	 * data buffer
+	 */
+	if (td_ioengine_flagged(td, FIO_NOIO) ||
+	    !(td_read(td) || td_write(td) || (td_trim(td) && td->o.num_range > 1)))
  		data_xfer = 0;
/*
@@ -1396,7 +1407,10 @@ int init_io_u_buffers(struct thread_data *td)
  				fill_verify_pattern(td, io_u->buf, max_bs, io_u, 0, 0);
  			}
  		}
-		p += max_bs;
+		if (td_trim(td) && td->o.num_range > 1)
+			p += trim_bs;
+		else
+			p += max_bs;
  	}
return 0;
diff --git a/cconv.c b/cconv.c
index c9298408..ead47248 100644
--- a/cconv.c
+++ b/cconv.c
@@ -111,6 +111,7 @@ int convert_thread_options_to_cpu(struct thread_options *o,
  	o->serialize_overlap = le32_to_cpu(top->serialize_overlap);
  	o->size = le64_to_cpu(top->size);
  	o->io_size = le64_to_cpu(top->io_size);
+	o->num_range = le32_to_cpu(top->num_range);
  	o->size_percent = le32_to_cpu(top->size_percent);
  	o->io_size_percent = le32_to_cpu(top->io_size_percent);
  	o->fill_device = le32_to_cpu(top->fill_device);
@@ -609,6 +610,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top,
top->size = __cpu_to_le64(o->size);
  	top->io_size = __cpu_to_le64(o->io_size);
+	top->num_range = __cpu_to_le32(o->num_range);
  	top->verify_backlog = __cpu_to_le64(o->verify_backlog);
  	top->start_delay = __cpu_to_le64(o->start_delay);
  	top->start_delay_high = __cpu_to_le64(o->start_delay_high);
diff --git a/fio.1 b/fio.1
index 7ec5c745..cfa49423 100644
--- a/fio.1
+++ b/fio.1
@@ -2293,6 +2293,12 @@ end to end protection information. Default: 0x1234.
  Specifies logical block application tag mask value, if namespace is formatted
  to use end to end protection information. Default: 0xffff.
  .TP
+.BI (io_uring_cmd)num_range \fR=\fPint
+For trim command this will be the number of ranges to trim per I/O request.
+The number of logical blocks per range is determined by the \fBbs\fR option
+which should be a multiple of logical block size. This cannot be used with
+read or write. Default: 1.
+.TP
  .BI (cpuio)cpuload \fR=\fPint
  Attempt to use the specified percentage of CPU cycles. This is a mandatory
  option when using cpuio I/O engine.
diff --git a/fio.h b/fio.h
index 1322656f..5d558e7b 100644
--- a/fio.h
+++ b/fio.h
@@ -71,6 +71,16 @@
struct fio_sem; +#define MAX_TRIM_RANGE 256
+
+/*
+ * Range for trim command
+ */
+struct trim_range {
+	unsigned long long start;
+	unsigned long long len;
+};
+
  /*
   * offset generator types
   */
@@ -609,6 +619,14 @@ static inline void fio_ro_check(const struct thread_data *td, struct io_u *io_u)
  	       !(io_u->ddir == DDIR_TRIM && !td_trim(td)));
  }
+static inline bool multi_range_trim(struct thread_data *td, struct io_u *io_u)
+{
+	if (io_u->ddir == DDIR_TRIM && td->o.num_range > 1)
+		return true;
+	else
+		return false;
+}
+
  static inline bool should_fsync(struct thread_data *td)
  {
  	if (td->last_was_sync)
diff --git a/init.c b/init.c
index 105339fa..7a0b14a3 100644
--- a/init.c
+++ b/init.c
@@ -618,6 +618,19 @@ static int fixup_options(struct thread_data *td)
  		ret |= 1;
  	}
+ if (td_trimwrite(td) && o->num_range > 1) {
+		log_err("fio: trimwrite cannot be used with multiple"
+			" ranges.\n");
+		ret |= 1;
+	}
+
+	if (td_trim(td) && o->num_range > 1 &&
+	    !td_ioengine_flagged(td, FIO_MULTI_RANGE_TRIM)) {
+		log_err("fio: can't use multiple ranges with IO engine %s\n",
+			td->io_ops->name);
+		ret |= 1;
+	}
+
  #ifndef CONFIG_PSHARED
  	if (!o->use_thread) {
  		log_info("fio: this platform does not support process shared"
diff --git a/io_u.c b/io_u.c
index 13187882..50df518c 100644
--- a/io_u.c
+++ b/io_u.c
@@ -943,8 +943,11 @@ static void setup_strided_zone_mode(struct thread_data *td, struct io_u *io_u)
  static int fill_io_u(struct thread_data *td, struct io_u *io_u)
  {
  	bool is_random;
-	uint64_t offset;
+	uint64_t offset, buflen, i = 0;
  	enum io_u_action ret;
+	struct fio_file *f = io_u->file;
+	struct trim_range *range;
+	uint8_t *buf;
if (td_ioengine_flagged(td, FIO_NOIO))
  		goto out;
@@ -966,22 +969,68 @@ static int fill_io_u(struct thread_data *td, struct io_u *io_u)
  	else if (td->o.zone_mode == ZONE_MODE_ZBD)
  		setup_zbd_zone_mode(td, io_u);
- /*
-	 * No log, let the seq/rand engine retrieve the next buflen and
-	 * position.
-	 */
-	if (get_next_offset(td, io_u, &is_random)) {
-		dprint(FD_IO, "io_u %p, failed getting offset\n", io_u);
-		return 1;
-	}
+	if (multi_range_trim(td, io_u)) {
+		buf = io_u->buf;
+		buflen = 0;
- io_u->buflen = get_next_buflen(td, io_u, is_random);
-	if (!io_u->buflen) {
-		dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u);
-		return 1;
-	}
+		while (i < td->o.num_range) {
+			range = (struct trim_range *)buf;
+			if (get_next_offset(td, io_u, &is_random)) {
+				dprint(FD_IO, "io_u %p, failed getting offset\n",
+				       io_u);
+				break;
+			}
+
+			io_u->buflen = get_next_buflen(td, io_u, is_random);
+			if (!io_u->buflen) {
+				dprint(FD_IO, "io_u %p, failed getting buflen\n", io_u);
+				break;
+			}
+
+			if (io_u->offset + io_u->buflen > io_u->file->real_file_size) {
+				dprint(FD_IO, "io_u %p, off=0x%llx + len=0x%llx exceeds file size=0x%llx\n",
+					io_u,
+					(unsigned long long) io_u->offset, io_u->buflen,
+					(unsigned long long) io_u->file->real_file_size);
+				break;
+			}
+
+			range->start = io_u->offset;
+			range->len = io_u->buflen;

It would be helpful to include a debug message with the offset and length here dprint(FD_IO, "...") for troubleshooting and testing purposes since there is no other convenient way to see all the ranges.

Vincent




[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux