On 7/26/22 4:52 AM, Ankit Kumar wrote: > Add a way to test uring passthrough commands, which was added > with 5.19 kernel. This requires nvme-ns character device (/dev/ngXnY) > as filename argument. It runs a combination of read/write tests with > sqthread poll, vectored and non-vectored commands, fixed I/O buffers. > > Signed-off-by: Ankit Kumar <ankit.kumar@xxxxxxxxxxx> > --- > test/Makefile | 1 + > test/io_uring_passthrough.c | 319 ++++++++++++++++++++++++++++++++++++ > 2 files changed, 320 insertions(+) > create mode 100644 test/io_uring_passthrough.c > > diff --git a/test/Makefile b/test/Makefile > index a36ddb3..418c11c 100644 > --- a/test/Makefile > +++ b/test/Makefile > @@ -90,6 +90,7 @@ test_srcs := \ > io-cancel.c \ > iopoll.c \ > io_uring_enter.c \ > + io_uring_passthrough.c \ > io_uring_register.c \ > io_uring_setup.c \ > lfs-openat.c \ > diff --git a/test/io_uring_passthrough.c b/test/io_uring_passthrough.c > new file mode 100644 > index 0000000..2e2b806 > --- /dev/null > +++ b/test/io_uring_passthrough.c > @@ -0,0 +1,319 @@ > +/* SPDX-License-Identifier: MIT */ > +/* > + * Description: basic read/write tests for io_uring passthrough commands > + */ > +#include <errno.h> > +#include <stdio.h> > +#include <unistd.h> > +#include <stdlib.h> > +#include <string.h> > + > +#include "helpers.h" > +#include "liburing.h" > +#include "nvme.h" > + > +#define FILE_SIZE (256 * 1024) > +#define BS 8192 > +#define BUFFERS (FILE_SIZE / BS) > + > +static struct iovec *vecs; > + > +/* > + * Each offset in the file has the ((test_case / 2) * FILE_SIZE) > + * + (offset / sizeof(int)) stored for every > + * sizeof(int) address. > + */ > +static int verify_buf(int tc, void *buf, off_t off) > +{ > + int i, u_in_buf = BS / sizeof(unsigned int); > + unsigned int *ptr; > + > + off /= sizeof(unsigned int); > + off += (tc / 2) * FILE_SIZE; > + ptr = buf; > + for (i = 0; i < u_in_buf; i++) { > + if (off != *ptr) { > + fprintf(stderr, "Found %u, wanted %lu\n", *ptr, off); > + return 1; > + } > + ptr++; > + off++; > + } > + > + return 0; > +} > + > +static int fill_pattern(int tc) > +{ > + unsigned int val, *ptr; > + int i, j; > + int u_in_buf = BS / sizeof(val); > + > + val = (tc / 2) * FILE_SIZE; > + for (i = 0; i < BUFFERS; i++) { > + ptr = vecs[i].iov_base; > + for (j = 0; j < u_in_buf; j++) { > + *ptr = val; > + val++; > + ptr++; > + } > + } > + > + return 0; > +} > + > +static int __test_io(const char *file, struct io_uring *ring, int tc, int read, > + int sqthread, int fixed, int nonvec) > +{ > + struct io_uring_sqe *sqe; > + struct io_uring_cqe *cqe; > + struct nvme_uring_cmd *cmd; > + int open_flags; > + int do_fixed; > + int i, ret, fd = -1; > + off_t offset; > + __u64 slba; > + __u32 nlb; > + > +#ifdef VERBOSE > + fprintf(stdout, "%s: start %d/%d/%d/%d: ", __FUNCTION__, read, > + sqthread, fixed, > + nonvec); > +#endif > + if (read) > + open_flags = O_RDONLY; > + else > + open_flags = O_WRONLY; > + > + if (fixed) { > + ret = t_register_buffers(ring, vecs, BUFFERS); > + if (ret == T_SETUP_SKIP) > + return 0; > + if (ret != T_SETUP_OK) { > + fprintf(stderr, "buffer reg failed: %d\n", ret); > + goto err; > + } > + } > + > + fd = open(file, open_flags); > + if (fd < 0) { > + perror("file open"); > + goto err; > + } > + > + if (sqthread) { > + ret = io_uring_register_files(ring, &fd, 1); > + if (ret) { > + fprintf(stderr, "file reg failed: %d\n", ret); > + goto err; > + } > + } > + > + if (!read) > + fill_pattern(tc); > + > + offset = 0; > + for (i = 0; i < BUFFERS; i++) { > + sqe = io_uring_get_sqe(ring); > + if (!sqe) { > + fprintf(stderr, "sqe get failed\n"); > + goto err; > + } > + if (read) { > + int use_fd = fd; > + > + do_fixed = fixed; > + > + if (sqthread) > + use_fd = 0; > + if (fixed && (i & 1)) > + do_fixed = 0; > + if (do_fixed) { > + io_uring_prep_read_fixed(sqe, use_fd, vecs[i].iov_base, > + vecs[i].iov_len, > + offset, i); > + sqe->cmd_op = NVME_URING_CMD_IO; > + } else if (nonvec) { > + io_uring_prep_read(sqe, use_fd, vecs[i].iov_base, > + vecs[i].iov_len, offset); > + sqe->cmd_op = NVME_URING_CMD_IO; > + } else { > + io_uring_prep_readv(sqe, use_fd, &vecs[i], 1, > + offset); > + sqe->cmd_op = NVME_URING_CMD_IO_VEC; > + } > + } else { > + int use_fd = fd; > + > + do_fixed = fixed; > + > + if (sqthread) > + use_fd = 0; > + if (fixed && (i & 1)) > + do_fixed = 0; > + if (do_fixed) { > + io_uring_prep_write_fixed(sqe, use_fd, vecs[i].iov_base, > + vecs[i].iov_len, > + offset, i); > + sqe->cmd_op = NVME_URING_CMD_IO; > + } else if (nonvec) { > + io_uring_prep_write(sqe, use_fd, vecs[i].iov_base, > + vecs[i].iov_len, offset); > + sqe->cmd_op = NVME_URING_CMD_IO; > + } else { > + io_uring_prep_writev(sqe, use_fd, &vecs[i], 1, > + offset); > + sqe->cmd_op = NVME_URING_CMD_IO_VEC; > + } > + } > + sqe->opcode = IORING_OP_URING_CMD; > + sqe->user_data = ((uint64_t)offset << 32) | i; > + if (sqthread) > + sqe->flags |= IOSQE_FIXED_FILE; > + > + /* 80 bytes for NVMe uring passthrough command */ > + cmd = (struct nvme_uring_cmd *)sqe->cmd; > + memset(cmd, 0, sizeof(struct nvme_uring_cmd)); > + > + cmd->opcode = read ? nvme_cmd_read : nvme_cmd_write; > + > + slba = offset >> lba_shift; > + nlb = (BS >> lba_shift) - 1; > + > + /* cdw10 and cdw11 represent starting lba */ > + cmd->cdw10 = slba & 0xffffffff; > + cmd->cdw11 = slba >> 32; > + /* cdw12 represent number of lba's for read/write */ > + cmd->cdw12 = nlb; > + if (do_fixed || nonvec) { > + cmd->addr = (__u64)(uintptr_t)vecs[i].iov_base; > + cmd->data_len = vecs[i].iov_len; > + } else { > + cmd->addr = (__u64)(uintptr_t)&vecs[i]; > + cmd->data_len = 1; > + } > + cmd->nsid = nsid; > + > + offset += BS; > + } > + > + ret = io_uring_submit(ring); > + if (ret != BUFFERS) { > + fprintf(stderr, "submit got %d, wanted %d\n", ret, BUFFERS); > + goto err; > + } > + > + for (i = 0; i < BUFFERS; i++) { > + ret = io_uring_wait_cqe(ring, &cqe); > + if (ret) { > + fprintf(stderr, "wait_cqe=%d\n", ret); > + goto err; > + } > + if (cqe->res != 0) { > + fprintf(stderr, "cqe res %d, wanted 0\n", cqe->res); > + goto err; > + } > + io_uring_cqe_seen(ring, cqe); > + if (read) { > + int index = cqe->user_data & 0xffffffff; > + void *buf = vecs[index].iov_base; > + off_t voff = cqe->user_data >> 32; > + > + if (verify_buf(tc, buf, voff)) > + goto err; > + } > + } > + > + if (fixed) { > + ret = io_uring_unregister_buffers(ring); > + if (ret) { > + fprintf(stderr, "buffer unreg failed: %d\n", ret); > + goto err; > + } > + } > + if (sqthread) { > + ret = io_uring_unregister_files(ring); > + if (ret) { > + fprintf(stderr, "file unreg failed: %d\n", ret); > + goto err; > + } > + } > + > + close(fd); > +#ifdef VERBOSE > + fprintf(stdout, "PASS\n"); > +#endif > + return 0; > +err: > +#ifdef VERBOSE > + fprintf(stderr, "FAILED\n"); > +#endif > + if (fd != -1) > + close(fd); > + return 1; > +} > + > +static int test_io(const char *file, int tc, int read, int sqthread, > + int fixed, int nonvec) > +{ > + struct io_uring ring; > + int ret, ring_flags = 0; > + > + ring_flags |= IORING_SETUP_SQE128; > + ring_flags |= IORING_SETUP_CQE32; > + > + if (sqthread) > + ring_flags |= IORING_SETUP_SQPOLL; > + > + ret = t_create_ring(64, &ring, ring_flags); > + if (ret == T_SETUP_SKIP) > + return 0; > + if (ret != T_SETUP_OK) { > + fprintf(stderr, "ring create failed: %d\n", ret); > + return 1; > + } > + > + ret = __test_io(file, &ring, tc, read, sqthread, fixed, nonvec); > + io_uring_queue_exit(&ring); > + > + return ret; > +} > + > +int main(int argc, char *argv[]) > +{ > + int i, ret; > + char *fname; > + > + if (argc < 2) { > + printf("%s: requires NVMe character device\n", argv[0]); > + return T_EXIT_SKIP; > + } > + > + fname = argv[1]; > + ret = fio_nvme_get_info(fname); > + > + if (ret) { > + fprintf(stderr, "failed to fetch device info: %d\n", ret); > + goto err; > + } If we can't open the device, then we should probably turn this into a SKIP rather than a FAIL? Same for if the argument passed isn't actually an nvme device, it should just skip the test in that case rather than print errors. -- Jens Axboe