Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@xxxxxxxxxx>
---
fs/io_uring.c | 139 +++++++++++++++++++++++++++++++++++++++++-
include/uapi/linux/io_uring.h | 8 +--
2 files changed, 140 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 71f6d5c..6020fd2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1006,6 +1006,9 @@ struct io_op_def {
.work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
},
+ [IORING_OP_BUFFERS_UPDATE] = {
+ .work_flags = IO_WQ_WORK_MM,
+ },
};
enum io_mem_account {
@@ -1025,6 +1028,9 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update *ip,
unsigned nr_args);
+static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
+ struct io_uring_rsrc_update *up,
+ unsigned nr_args);
static void __io_clean_op(struct io_kiocb *req);
static struct file *io_file_get(struct io_submit_state *state,
struct io_kiocb *req, int fd, bool fixed);
@@ -5939,6 +5945,19 @@ static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node)
percpu_ref_exit(&ref_node->refs);
kfree(ref_node);
}
+
+static int io_buffers_update_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ return io_rsrc_update_prep(req, sqe);
+}
+
+static int io_buffers_update(struct io_kiocb *req, bool force_nonblock,
+ struct io_comp_state *cs)
+{
+ return io_rsrc_update(req, force_nonblock, cs, __io_sqe_buffers_update);
+}
+
static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
switch (req->opcode) {
@@ -6010,11 +6029,13 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_renameat_prep(req, sqe);
case IORING_OP_UNLINKAT:
return io_unlinkat_prep(req, sqe);
+ case IORING_OP_BUFFERS_UPDATE:
+ return io_buffers_update_prep(req, sqe);
}
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
- return-EINVAL;
+ return -EINVAL;
}
static int io_req_defer_prep(struct io_kiocb *req,
@@ -6268,6 +6289,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
case IORING_OP_UNLINKAT:
ret = io_unlinkat(req, force_nonblock);
break;
+ case IORING_OP_BUFFERS_UPDATE:
+ ret = io_buffers_update(req, force_nonblock, cs);
+ break;
default:
ret = -EINVAL;
break;
@@ -8224,6 +8248,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
if (imu->acct_pages)
io_unaccount_mem(ctx, imu->nr_bvecs, ACCT_PINNED);
kvfree(imu->bvec);
+ imu->bvec = NULL;
imu->nr_bvecs = 0;
}
@@ -8441,6 +8466,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
if (pret > 0)
unpin_user_pages(pages, pret);
kvfree(imu->bvec);
+ imu->bvec = NULL;
goto done;
}
@@ -8602,6 +8628,8 @@ static void io_buf_data_ref_zero(struct percpu_ref *ref)
static void io_ring_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
{
io_buffer_unmap(ctx, prsrc->buf);
+ kvfree(prsrc->buf);
+ prsrc->buf = NULL;
}
static struct fixed_rsrc_ref_node *alloc_fixed_buf_ref_node(
@@ -8684,6 +8712,111 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
return 0;
}
+static inline int io_queue_buffer_removal(struct fixed_rsrc_data *data,
+ struct io_mapped_ubuf *imu)
+{
+ return io_queue_rsrc_removal(data, (void *)imu);
+}
+
+static void destroy_fixed_buf_ref_node(struct fixed_rsrc_ref_node *ref_node)
+{
+ destroy_fixed_rsrc_ref_node(ref_node);
+}
+
+static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
+ struct io_uring_rsrc_update *up,
+ unsigned nr_args)
+{
+ struct fixed_rsrc_data *data = ctx->buf_data;
+ struct fixed_rsrc_ref_node *ref_node;
+ struct io_mapped_ubuf *imu;
+ struct iovec iov;
+ struct iovec __user *iovs;
+ struct page *last_hpage = NULL;
+ __u32 done;
+ int i, err;
+ bool needs_switch = false;
+
+ if (check_add_overflow(up->offset, nr_args, &done))
+ return -EOVERFLOW;
+ if (done > ctx->nr_user_bufs)
+ return -EINVAL;
+
+ ref_node = alloc_fixed_buf_ref_node(ctx);
+ if (IS_ERR(ref_node))
+ return PTR_ERR(ref_node);
+
+ done = 0;
+ iovs = u64_to_user_ptr(up->iovs);
+ while (nr_args) {
+ struct fixed_rsrc_table *table;
+ unsigned index;
+
+ err = 0;
+ if (copy_from_user(&iov, &iovs[done], sizeof(iov))) {
+ err = -EFAULT;
+ break;
+ }
+ i = array_index_nospec(up->offset, ctx->nr_user_bufs);
+ table = &ctx->buf_data->table[i >> IORING_BUF_TABLE_SHIFT];
+ index = i & IORING_BUF_TABLE_MASK;
+ imu = &table->bufs[index];
+ if (table->bufs[index].ubuf) {
+ struct io_mapped_ubuf *dup;
+ dup = kmemdup(imu, sizeof(*imu), GFP_KERNEL);
+ if (!dup) {
+ err = -ENOMEM;
+ break;
+ }
+ err = io_queue_buffer_removal(data, dup);
+ if (err)
+ break;
+ memset(imu, 0, sizeof(*imu));
+ needs_switch = true;
+ }
+ if (!io_buffer_validate(&iov)) {
+ err = io_sqe_buffer_register(ctx, &iov, imu,
+ &last_hpage);
+ if (err) {
+ memset(imu, 0, sizeof(*imu));
+ break;
+ }
+ }
+ nr_args--;
+ done++;
+ up->offset++;
+ }
+
+ if (needs_switch) {
+ percpu_ref_kill(&data->node->refs);
+ spin_lock(&data->lock);
+ list_add(&ref_node->node, &data->ref_list);
+ data->node = ref_node;
+ spin_unlock(&data->lock);
+ percpu_ref_get(&ctx->buf_data->refs);
+ } else
+ destroy_fixed_buf_ref_node(ref_node);
+
+ return done ? done : err;
+}
+
+static int io_sqe_buffers_update(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned nr_args)
+{
+ struct io_uring_rsrc_update up;
+
+ if (!ctx->buf_data)
+ return -ENXIO;
+ if (!nr_args)
+ return -EINVAL;
+ if (copy_from_user(&up, arg, sizeof(up)))
+ return -EFAULT;
+ if (up.resv)
+ return -EINVAL;
+
+ return __io_sqe_buffers_update(ctx, &up, nr_args);
+}
+
static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg)
{
__s32 __user *fds = arg;
@@ -9961,6 +10094,7 @@ static bool io_register_op_must_quiesce(int op)
switch (op) {
case IORING_UNREGISTER_FILES:
case IORING_REGISTER_FILES_UPDATE:
+ case IORING_REGISTER_BUFFERS_UPDATE:
case IORING_REGISTER_PROBE:
case IORING_REGISTER_PERSONALITY:
case IORING_UNREGISTER_PERSONALITY:
@@ -10036,6 +10170,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_sqe_buffers_unregister(ctx);
break;
+ case IORING_REGISTER_BUFFERS_UPDATE:
+ ret = io_sqe_buffers_update(ctx, arg, nr_args);
+ break;
case IORING_REGISTER_FILES:
ret = io_sqe_files_register(ctx, arg, nr_args);
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 87f0f56..17682b5 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -137,6 +137,7 @@ enum {
IORING_OP_SHUTDOWN,
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
+ IORING_OP_BUFFERS_UPDATE,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -279,17 +280,12 @@ enum {
IORING_UNREGISTER_PERSONALITY = 10,
IORING_REGISTER_RESTRICTIONS = 11,
IORING_REGISTER_ENABLE_RINGS = 12,
+ IORING_REGISTER_BUFFERS_UPDATE = 13,
/* this goes last */
IORING_REGISTER_LAST
};
-struct io_uring_files_update {
- __u32 offset;
- __u32 resv;
- __aligned_u64 /* __s32 * */ fds;
-};
-
struct io_uring_rsrc_update {
__u32 offset;
__u32 resv;