TODO: don't rob all bits from params, use pointer to a struct Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> --- fs/io_uring.c | 89 +++++++++++++++++++++++++++-------- include/uapi/linux/io_uring.h | 3 +- 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f05592ae5f41..067cfb3a6e4a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -91,6 +91,7 @@ #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) #define IO_DEFAULT_CQ 0 +#define IO_MAX_CQRINGS 1024 /* * Shift of 9 is 512 entries, or exactly one page on 64-bit archs @@ -417,7 +418,7 @@ struct io_ring_ctx { unsigned long cq_check_overflow; unsigned cq_extra; struct wait_queue_head cq_wait; - struct io_cqring cqs[1]; + struct io_cqring *cqs; unsigned int cq_nr; struct { @@ -1166,6 +1167,9 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) if (!ctx) return NULL; + ctx->cqs = kmalloc_array(p->nr_cq + 1, sizeof(ctx->cqs[0]), GFP_KERNEL); + if (!ctx->cqs) + goto err; /* * Use 5 bits less than the max cq entries, that should give us around * 32 entries per hash list if totally full and uniformly spread. @@ -8634,6 +8638,8 @@ static bool io_wait_rsrc_data(struct io_rsrc_data *data) static void io_ring_ctx_free(struct io_ring_ctx *ctx) { + unsigned int i; + io_sq_thread_finish(ctx); if (ctx->mm_account) { @@ -8673,6 +8679,9 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_mem_free(ctx->rings); io_mem_free(ctx->sq_sqes); + for (i = 1; i < ctx->cq_nr; i++) + io_mem_free(ctx->cqs[i].rings); + kfree(ctx->cqs); percpu_ref_exit(&ctx->refs); free_uid(ctx->user); @@ -9524,11 +9533,39 @@ static const struct file_operations io_uring_fops = { #endif }; +static void __io_init_cqring(struct io_cqring *cq, struct io_rings *rings, + unsigned int entries) +{ + WRITE_ONCE(rings->cq_ring_entries, entries); + WRITE_ONCE(rings->cq_ring_mask, entries - 1); + + cq->cached_tail = 0; + cq->rings = rings; + cq->entries = entries; +} + +static int io_init_cqring(struct io_cqring *cq, unsigned int entries) +{ + struct io_rings *rings; + size_t size; + + size = rings_size(0, entries, NULL); + if (size == SIZE_MAX) + return -EOVERFLOW; + rings = io_mem_alloc(size); + if (!rings) + return -ENOMEM; + __io_init_cqring(cq, rings, entries); + return 0; +} + static int io_allocate_scq_urings(struct io_ring_ctx *ctx, struct io_uring_params *p) { + u32 __user *cq_sizes = u64_to_user_ptr(p->cq_sizes); struct io_rings *rings; size_t size, sq_array_offset; + int i, ret; /* make sure these are sane, as we already accounted them */ ctx->sq_entries = p->sq_entries; @@ -9544,30 +9581,43 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx, ctx->rings = rings; ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); rings->sq_ring_mask = p->sq_entries - 1; - rings->cq_ring_mask = p->cq_entries - 1; rings->sq_ring_entries = p->sq_entries; - rings->cq_ring_entries = p->cq_entries; - ctx->cqs[0].cached_tail = 0; - ctx->cqs[0].rings = rings; - ctx->cqs[0].entries = p->cq_entries; + __io_init_cqring(&ctx->cqs[0], rings, p->cq_entries); ctx->cq_nr = 1; size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); - if (size == SIZE_MAX) { - io_mem_free(ctx->rings); - ctx->rings = NULL; - return -EOVERFLOW; - } + ret = -EOVERFLOW; + if (unlikely(size == SIZE_MAX)) + goto err; ctx->sq_sqes = io_mem_alloc(size); - if (!ctx->sq_sqes) { - io_mem_free(ctx->rings); - ctx->rings = NULL; - return -ENOMEM; + ret = -ENOMEM; + if (unlikely(!ctx->sq_sqes)) + goto err; + + for (i = 0; i < p->nr_cq; i++, ctx->cq_nr++) { + u32 sz; + long entries; + + ret = -EFAULT; + if (copy_from_user(&sz, &cq_sizes[i], sizeof(sz))) + goto err; + entries = io_get_cqring_size(p, sz); + if (entries < 0) { + ret = entries; + goto err; + } + ret = io_init_cqring(&ctx->cqs[i + 1], entries); + if (ret) + goto err; } return 0; +err: + io_mem_free(ctx->rings); + ctx->rings = NULL; + return ret; } static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) @@ -9653,6 +9703,10 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, } else { p->cq_entries = 2 * p->sq_entries; } + if (p->nr_cq > IO_MAX_CQRINGS) + return -EINVAL; + if (!p->nr_cq != !p->cq_sizes) + return -EINVAL; ctx = io_ring_ctx_alloc(p); if (!ctx) @@ -9744,14 +9798,9 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, static long io_uring_setup(u32 entries, struct io_uring_params __user *params) { struct io_uring_params p; - int i; if (copy_from_user(&p, params, sizeof(p))) return -EFAULT; - for (i = 0; i < ARRAY_SIZE(p.resv); i++) { - if (p.resv[i]) - return -EINVAL; - } if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index c2dfb179360a..92b61ca09ea5 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -263,7 +263,8 @@ struct io_uring_params { __u32 sq_thread_idle; __u32 features; __u32 wq_fd; - __u32 resv[3]; + __u32 nr_cq; + __u64 cq_sizes; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; }; -- 2.31.1