Re: [PATCH 1/1] engines/io_uring: io_uring_cmd engine cleanup and fixes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 5/8/23 12:01, Ankit Kumar wrote:
- The io_uring_cmd ioengine assumes that logical block size is power of 2.
In case of extended LBA where metadata is transferred as part of LBA
this is not correct. Use division operation for that. The existing
calculation for power of 2 LBA remains the same.

- The current implementation doesn't support protection info and
metadata transferred as separate buffer, return error for these
scenarios.

- Add checks to verify that block sizes are multiple of LBA size.
- Add support for 64 LBA formats as per the latest NVM command set
specification.

Signed-off-by: Ankit Kumar <ankit.kumar@xxxxxxxxxxx>

Ankit, overall this looks fine.

Consider splitting this patch into two separate ones:

1) add support for extended LBA sizes
2) add support for more than 16 LBA formats

From what I can tell the two changes are conceptually distinct, so it makes sense to have two separate patches.

I have 4 specific comments below.

---
  engines/io_uring.c | 31 +++++++++++++++++++++----
  engines/nvme.c     | 57 +++++++++++++++++++++++++++++++++++++---------
  engines/nvme.h     |  3 ++-
  3 files changed, 75 insertions(+), 16 deletions(-)

diff --git a/engines/io_uring.c b/engines/io_uring.c
index f5ffe9f4..32ddba19 100644
--- a/engines/io_uring.c
+++ b/engines/io_uring.c
@@ -1177,22 +1177,41 @@ static int fio_ioring_cmd_open_file(struct thread_data *td, struct fio_file *f)
  	if (o->cmd_type == FIO_URING_CMD_NVME) {
  		struct nvme_data *data = NULL;
  		unsigned int nsid, lba_size = 0;
+		__u32 ms = 0;
  		__u64 nlba = 0;
  		int ret;
/* Store the namespace-id and lba size. */
  		data = FILE_ENG_DATA(f);
  		if (data == NULL) {
-			ret = fio_nvme_get_info(f, &nsid, &lba_size, &nlba);
+			ret = fio_nvme_get_info(f, &nsid, &lba_size, &ms, &nlba);
  			if (ret)
  				return ret;
data = calloc(1, sizeof(struct nvme_data));
  			data->nsid = nsid;
-			data->lba_shift = ilog2(lba_size);
+			if (ms)
+				data->lba_ext = lba_size + ms;
+			else
+				data->lba_shift = ilog2(lba_size);
FILE_SET_ENG_DATA(f, data);
  		}
+
+		data = FILE_ENG_DATA(f);

The line above is not needed since data gets a value from calloc if it was intially null.

+		lba_size = data->lba_ext ? data->lba_ext : (1 << data->lba_shift);
+
+		for_each_rw_ddir(ddir) {
+			if (td->o.min_bs[ddir] % lba_size != 0 ||
+				td->o.max_bs[ddir] % lba_size != 0) {

The != 0 is not needed in the two lines above.

+				if (data->lba_ext)
+					log_err("bs must be a multiple of "
+						"(LBA data size + Metadata size)\n");
+				else
+					log_err("bs must be a multiple of LBA data size\n");
+				return 1;
+			}
+                }
  	}
  	if (!ld || !o->registerfiles)
  		return generic_open_file(td, f);
@@ -1243,16 +1262,20 @@ static int fio_ioring_cmd_get_file_size(struct thread_data *td,
  	if (o->cmd_type == FIO_URING_CMD_NVME) {
  		struct nvme_data *data = NULL;
  		unsigned int nsid, lba_size = 0;
+		__u32 ms = 0;
  		__u64 nlba = 0;
  		int ret;
- ret = fio_nvme_get_info(f, &nsid, &lba_size, &nlba);
+		ret = fio_nvme_get_info(f, &nsid, &lba_size, &ms, &nlba);
  		if (ret)
  			return ret;
data = calloc(1, sizeof(struct nvme_data));
  		data->nsid = nsid;
-		data->lba_shift = ilog2(lba_size);
+		if (ms)
+			data->lba_ext = lba_size + ms;
+		else
+			data->lba_shift = ilog2(lba_size);
f->real_file_size = lba_size * nlba;
  		fio_file_set_size_known(f);
diff --git a/engines/nvme.c b/engines/nvme.c
index fd2161f3..b99eae9c 100644
--- a/engines/nvme.c
+++ b/engines/nvme.c
@@ -21,8 +21,13 @@ int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
  	else
  		return -ENOTSUP;
- slba = io_u->offset >> data->lba_shift;
-	nlb = (io_u->xfer_buflen >> data->lba_shift) - 1;
+	if (data->lba_ext) {
+		slba = io_u->offset / data->lba_ext;
+		nlb = (io_u->xfer_buflen / data->lba_ext) - 1;
+	} else {
+		slba = io_u->offset >> data->lba_shift;
+		nlb = (io_u->xfer_buflen >> data->lba_shift) - 1;
+	}
/* cdw10 and cdw11 represent starting lba */
  	cmd->cdw10 = slba & 0xffffffff;
@@ -65,8 +70,13 @@ int fio_nvme_trim(const struct thread_data *td, struct fio_file *f,
  	struct nvme_dsm_range dsm;
  	int ret;
- dsm.nlb = (len >> data->lba_shift);
-	dsm.slba = (offset >> data->lba_shift);
+	if (data->lba_ext) {
+		dsm.nlb = len / data->lba_ext;
+		dsm.slba = offset / data->lba_ext;
+	} else {
+		dsm.nlb = len >> data->lba_shift;
+		dsm.slba = offset >> data->lba_shift;
+	}
ret = nvme_trim(f->fd, data->nsid, 1, sizeof(struct nvme_dsm_range),
  			&dsm);
@@ -94,11 +104,12 @@ static int nvme_identify(int fd, __u32 nsid, enum nvme_identify_cns cns,
  }
int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
-		      __u64 *nlba)
+		      __u32 *ms, __u64 *nlba)
  {
  	struct nvme_id_ns ns;
  	int namespace_id;
  	int fd, err;
+	__u32 format_idx;
if (f->filetype != FIO_TYPE_CHAR) {
  		log_err("ioengine io_uring_cmd only works with nvme ns "
@@ -113,9 +124,8 @@ int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
  	namespace_id = ioctl(fd, NVME_IOCTL_ID);
  	if (namespace_id < 0) {
  		err = -errno;
-		log_err("failed to fetch namespace-id");
-		close(fd);
-		return err;
+		log_err("%s: failed to fetch namespace-id\n", f->file_name);
+		goto out;
  	}
/*
@@ -125,17 +135,42 @@ int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
  	err = nvme_identify(fd, namespace_id, NVME_IDENTIFY_CNS_NS,
  				NVME_CSI_NVM, &ns);
  	if (err) {
-		log_err("failed to fetch identify namespace\n");
+		log_err("%s: failed to fetch identify namespace\n",
+			f->file_name);
  		close(fd);
  		return err;
  	}
*nsid = namespace_id;
-	*lba_sz = 1 << ns.lbaf[(ns.flbas & 0x0f)].ds;
+
+	if (ns.nlbaf < 16)
+		format_idx = ns.flbas & 0xf;
+	else
+		format_idx = (ns.flbas & 0xf) + (((ns.flbas >> 5) & 0x3) << 4);
+

Consider quoting the NVMe standard's description of flbas in the commit message to make it easier for anyone looking at this commit in the future to verify that the code above is doing the right thing.

+	*lba_sz = 1 << ns.lbaf[format_idx].ds;
+

For consistency the definition of struct nvme_id_ns should be updated to be struct nvme_lbaf lbaf[64].

+	/* Only extended LBA can be supported */
+	*ms = le16_to_cpu(ns.lbaf[format_idx].ms);
+	if (*ms && !((ns.flbas >> 4) & 0x1)) {
+		log_err("%s: only extended logical block can be supported\n",
+			f->file_name);
+		err = -ENOTSUP;
+		goto out;
+	}
+
+	/* Check for end to end data protection support */
+	if (ns.dps & 0x3) {
+		log_err("%s: end to end data protection not supported\n",
+			f->file_name);
+		err = -ENOTSUP;
+		goto out;
+	}
  	*nlba = ns.nsze;
+out:
  	close(fd);
-	return 0;
+	return err;
  }
int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f,
diff --git a/engines/nvme.h b/engines/nvme.h
index 408594d5..f1d84d68 100644
--- a/engines/nvme.h
+++ b/engines/nvme.h
@@ -88,6 +88,7 @@ enum nvme_zns_zs {
  struct nvme_data {
  	__u32 nsid;
  	__u32 lba_shift;
+	__u32 lba_ext;
  };
struct nvme_lbaf {
@@ -223,7 +224,7 @@ int fio_nvme_iomgmt_ruhs(struct thread_data *td, struct fio_file *f,
  			 struct nvme_fdp_ruh_status *ruhs, __u32 bytes);
int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz,
-		      __u64 *nlba);
+		      __u32 *ms, __u64 *nlba);
int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u,
  			    struct iovec *iov);




[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux