Re: (subset) [PATCH V10 0/12] io_uring: support group buffer & ublk zc

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/12/24 00:53, Ming Lei wrote:
On Thu, Nov 07, 2024 at 03:25:59PM -0700, Jens Axboe wrote:
On 11/7/24 3:25 PM, Jens Axboe wrote:
...
Hi Jens,

Any comment on the rest of the series?

Ming, it's dragging on because it's over complicated. I very much want
it to get to some conclusion, get it merged and move on, and I strongly
believe Jens shares the sentiment on getting the thing done.

Please, take the patches attached, adjust them to your needs and put
ublk on top. Or tell if there is a strong reason why it doesn't work.
The implementation is very simple and doesn't need almost anything
from io_uring, it's low risk and we can merge in no time.

If you can't cache the allocation in ublk, io_uring can add a cache.
If ublk needs more space and cannot embed the structure, we can add
a "private" pointer into io_mapped_ubuf. If it needs to check the IO
direction, we can add that as well (though I have doubts you really need
it, read-only might makes sense, write-only not so much). We'll also
merge Jens' patch allowing to remove a buffer with a request.

--
Pavel Begunkov
From 78a9c8a3b9d59e7465d6c158283a531a221fa3b2 Mon Sep 17 00:00:00 2001
Date: Tue, 12 Nov 2024 22:58:18 +0000
Subject: [PATCH 1/4] io_uring: export io_mapped_ubuf definition

---
 include/linux/io_uring/kbuf.h | 19 +++++++++++++++++++
 io_uring/rsrc.h               | 12 ++----------
 2 files changed, 21 insertions(+), 10 deletions(-)
 create mode 100644 include/linux/io_uring/kbuf.h

diff --git a/include/linux/io_uring/kbuf.h b/include/linux/io_uring/kbuf.h
new file mode 100644
index 000000000000..a32578df3d8e
--- /dev/null
+++ b/include/linux/io_uring/kbuf.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _LINUX_IO_URING_KBUF_H
+#define _LINUX_IO_URING_KBUF_H
+
+#include <uapi/linux/io_uring.h>
+#include <linux/io_uring_types.h>
+#include <linux/bvec.h>
+
+struct io_mapped_ubuf {
+	u64		ubuf;
+	unsigned int	len;
+	unsigned int	nr_bvecs;
+	unsigned int    folio_shift;
+	refcount_t	refs;
+	unsigned long	acct_pages;
+	struct bio_vec	bvec[] __counted_by(nr_bvecs);
+};
+
+#endif
\ No newline at end of file
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 7a4668deaa1a..885ccecade08 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -2,6 +2,8 @@
 #ifndef IOU_RSRC_H
 #define IOU_RSRC_H
 
+#include <linux/io_uring/kbuf.h>
+
 #define IO_NODE_ALLOC_CACHE_MAX 32
 
 #define IO_RSRC_TAG_TABLE_SHIFT	(PAGE_SHIFT - 3)
@@ -24,16 +26,6 @@ struct io_rsrc_node {
 	};
 };
 
-struct io_mapped_ubuf {
-	u64		ubuf;
-	unsigned int	len;
-	unsigned int	nr_bvecs;
-	unsigned int    folio_shift;
-	refcount_t	refs;
-	unsigned long	acct_pages;
-	struct bio_vec	bvec[] __counted_by(nr_bvecs);
-};
-
 struct io_imu_folio_data {
 	/* Head folio can be partially included in the fixed buf */
 	unsigned int	nr_pages_head;
-- 
2.46.0


From 6839ca1ca94a89ec11362f32af22e2c0cfdfaa81 Mon Sep 17 00:00:00 2001
Date: Tue, 12 Nov 2024 23:12:15 +0000
Subject: [PATCH 2/4] io_uring: add io_mapped_ubuf release callback

---
 include/linux/io_uring/kbuf.h | 10 ++++++++++
 io_uring/rsrc.c               |  6 +++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/linux/io_uring/kbuf.h b/include/linux/io_uring/kbuf.h
index a32578df3d8e..aa3eeaa1ac25 100644
--- a/include/linux/io_uring/kbuf.h
+++ b/include/linux/io_uring/kbuf.h
@@ -13,7 +13,17 @@ struct io_mapped_ubuf {
 	unsigned int    folio_shift;
 	refcount_t	refs;
 	unsigned long	acct_pages;
+	void (*release)(struct io_mapped_ubuf *);
 	struct bio_vec	bvec[] __counted_by(nr_bvecs);
 };
 
+static inline void iou_init_kbuf(struct io_mapped_ubuf *buf,
+				 void (*release)(struct io_mapped_ubuf *))
+{
+	refcount_set(&buf->refs, 1);
+	buf->acct_pages = 0;
+	buf->ubuf = 0;
+	buf->release = release;
+}
+
 #endif
\ No newline at end of file
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index adaae8630932..84ea5a480058 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -110,6 +110,10 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
 
 		if (!refcount_dec_and_test(&imu->refs))
 			return;
+		if (imu->release) {
+			imu->release(imu);
+			return;
+		}
 		for (i = 0; i < imu->nr_bvecs; i++)
 			unpin_user_page(imu->bvec[i].bv_page);
 		if (imu->acct_pages)
@@ -762,6 +766,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
 	}
 
 	size = iov->iov_len;
+	iou_init_kbuf(imu, NULL);
 	/* store original address for later verification */
 	imu->ubuf = (unsigned long) iov->iov_base;
 	imu->len = iov->iov_len;
@@ -769,7 +774,6 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
 	imu->folio_shift = PAGE_SHIFT;
 	if (coalesced)
 		imu->folio_shift = data.folio_shift;
-	refcount_set(&imu->refs, 1);
 	off = (unsigned long) iov->iov_base & ((1UL << imu->folio_shift) - 1);
 	node->buf = imu;
 	ret = 0;
-- 
2.46.0


From 55b56ed8c5cb58727c7daabd1e56e6f194749e7f Mon Sep 17 00:00:00 2001
Date: Tue, 12 Nov 2024 23:33:24 +0000
Subject: [PATCH 3/4] io_uring: add a helper for leasing a buffer

---
 include/linux/io_uring/kbuf.h | 23 +++++++++++++++++++++++
 io_uring/rsrc.c               | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/include/linux/io_uring/kbuf.h b/include/linux/io_uring/kbuf.h
index aa3eeaa1ac25..91cfcdc685cc 100644
--- a/include/linux/io_uring/kbuf.h
+++ b/include/linux/io_uring/kbuf.h
@@ -4,6 +4,7 @@
 
 #include <uapi/linux/io_uring.h>
 #include <linux/io_uring_types.h>
+#include <linux/io_uring/cmd.h>
 #include <linux/bvec.h>
 
 struct io_mapped_ubuf {
@@ -26,4 +27,26 @@ static inline void iou_init_kbuf(struct io_mapped_ubuf *buf,
 	buf->release = release;
 }
 
+#if defined(CONFIG_IO_URING)
+int iou_export_kbuf(struct io_ring_ctx *ctx, unsigned issue_flags,
+		    struct io_mapped_ubuf *buf, unsigned index);
+#else
+static inline int iou_export_kbuf(struct io_ring_ctx *ctx,
+				  unsigned issue_flags,
+				  struct io_mapped_ubuf *buf, unsigned index)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static inline int io_uring_cmd_export_kbuf(struct io_uring_cmd *cmd,
+					   unsigned issue_flags,
+					   struct io_mapped_ubuf *buf,
+					   unsigned index)
+{
+	struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx;
+
+	return iou_export_kbuf(ctx, issue_flags, buf, index);
+}
+
 #endif
\ No newline at end of file
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 84ea5a480058..07842a6a8020 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -797,6 +797,38 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
 	return node;
 }
 
+static int __iou_export_kbuf(struct io_ring_ctx *ctx, unsigned issue_flags,
+			     struct io_mapped_ubuf *buf, unsigned idx)
+{
+	struct io_rsrc_node *node;
+
+	if (unlikely(idx >= ctx->buf_table.nr)) {
+		if (!ctx->buf_table.nr)
+			return -ENXIO;
+		return -EINVAL;
+	}
+	idx = array_index_nospec(idx, ctx->buf_table.nr);
+
+	node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
+	if (!node)
+		return -ENOMEM;
+	node->buf = buf;
+	io_reset_rsrc_node(ctx, &ctx->buf_table, idx);
+	ctx->buf_table.nodes[idx] = node;
+	return 0;
+}
+
+int iou_export_kbuf(struct io_ring_ctx *ctx, unsigned issue_flags,
+		    struct io_mapped_ubuf *buf, unsigned idx)
+{
+	int ret;
+
+	io_ring_submit_lock(ctx, issue_flags);
+	ret = __iou_export_kbuf(ctx, issue_flags, buf, idx);
+	io_ring_submit_unlock(ctx, issue_flags);
+	return ret;
+}
+
 int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 			    unsigned int nr_args, u64 __user *tags)
 {
-- 
2.46.0
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <assert.h>
#include <unistd.h>

#include "liburing.h"
#include "test.h"

#define BUF_SIZE 4096

static char buf[BUF_SIZE];
static char buf_tmp[BUF_SIZE];

static void io_uring_prep_my_cmd_lease(struct io_uring_sqe *sqe)
{
	io_uring_prep_rw(IORING_OP_URING_CMD, sqe, fd, 0, 0, 0);
	/* TODO */
	...
}

static void do_submit_wait1(struct io_uring *ring)
{
	struct io_uring_cqe *cqe;
	int ret;

	ret = io_uring_submit(ring);
	assert(ret == 1);
	ret = io_uring_wait_cqe(ring, &cqe);
	assert(ret >= 0);
	printf("cqe data %i res %i\n", (int)cqe->user_data, cqe->res);
	io_uring_cqe_seen(ring, cqe);
}

int main(int argc, char *argv[])
{
	unsigned long buf_offset = 0;
	unsigned slot = 0; /* reg buffer table index */
	struct io_uring_sqe *sqe;
	int pipe1[2], pipe2[2];
	struct io_uring ring;
	int ret, i;

	for (i = 0; i < BUF_SIZE; i++)
		buf[i] = (char)i;

	ret = pipe(pipe1);
	assert(ret == 0);
	ret = pipe(pipe2);
	assert(ret == 0);

	ret = io_uring_queue_init(8, &ring, 0);
	assert(ret == 0);
	ret = io_uring_register_buffers_sparse(&ring, 16);
	assert(ret >= 0);

	ret = write(pipe1[1], buf, BUF_SIZE);
	assert(ret == BUF_SIZE);

	sqe = io_uring_get_sqe(&ring);
	io_uring_prep_my_cmd_lease(sqe, bdev_fd, slot);
	sqe->user_data = 1;
	do_submit_wait1(&ring);

	// read data into the leased buffer
	sqe = io_uring_get_sqe(&ring);
	io_uring_prep_read_fixed(sqe, pipe1[0], (void *)buf_offset, BUF_SIZE, 0, slot);
	sqe->user_data = 2;
	do_submit_wait1(&ring);

	// write from the leased buffer into a pipe
	sqe = io_uring_get_sqe(&ring);
	io_uring_prep_write_fixed(sqe, pipe2[1], (void *)buf_offset, BUF_SIZE, 0, slot);
	sqe->user_data = 3;
	do_submit_wait1(&ring);

	// check the right data is in the pipe
	ret = read(pipe2[0], buf_tmp, BUF_SIZE);
	assert(ret == BUF_SIZE);
	for (i = 0; i < BUF_SIZE; i++) {
		assert(buf[i] == buf_tmp[i]);
	}

	struct iovec iovec = {};
	ret = io_uring_register_buffers_update_tag(&ring, 0, &iovec, NULL, 1);
	assert(ret >= 0);

	io_uring_queue_exit(&ring);
	return 0;
}

[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux