There's been a few requests for supporting more fixed files than 1024. This isn't really tricky to do, we just need to split up the file table into multiple tables and index appropriately. This patch adds support for up to 64K files, which should be enough for everyone. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- Complete with updated test case in liburing, test/file-register now has a test case that registers 8K files (sparse), then does a file set update of a random file late in the set and performs some IO to it to ensure it works. diff --git a/fs/io_uring.c b/fs/io_uring.c index 4402485f0879..2df92134e939 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -80,7 +80,11 @@ #define IORING_MAX_ENTRIES 32768 #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) -#define IORING_MAX_FIXED_FILES 1024 + +#define IORING_FILE_TABLE_SHIFT 10 +#define IORING_MAX_FILES_TABLE (1U << IORING_FILE_TABLE_SHIFT) +#define IORING_FILE_TABLE_MASK (IORING_MAX_FILES_TABLE - 1) +#define IORING_MAX_FIXED_FILES (64 * IORING_MAX_FILES_TABLE) struct io_uring { u32 head ____cacheline_aligned_in_smp; @@ -165,6 +169,10 @@ struct io_mapped_ubuf { unsigned int nr_bvecs; }; +struct fixed_file_table { + struct file **files; +}; + struct io_ring_ctx { struct { struct percpu_ref refs; @@ -225,7 +233,7 @@ struct io_ring_ctx { * readers must ensure that ->refs is alive as long as the file* is * used. Only updated through io_uring_register(2). */ - struct file **user_files; + struct fixed_file_table *file_table; unsigned nr_user_files; /* if used, fixed mapped user buffers */ @@ -2295,6 +2303,15 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe) } } +static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, + int index) +{ + struct fixed_file_table *table; + + table = &ctx->file_table[index >> IORING_FILE_TABLE_SHIFT]; + return table->files[index & IORING_FILE_TABLE_MASK]; +} + static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, struct io_submit_state *state, struct io_kiocb *req) { @@ -2317,12 +2334,15 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, return 0; if (flags & IOSQE_FIXED_FILE) { - if (unlikely(!ctx->user_files || + struct file *file; + + if (unlikely(!ctx->file_table || (unsigned) fd >= ctx->nr_user_files)) return -EBADF; - if (!ctx->user_files[fd]) + file = io_file_from_index(ctx, fd); + if (!file) return -EBADF; - req->file = ctx->user_files[fd]; + req->file = file; req->flags |= REQ_F_FIXED_FILE; } else { if (s->needs_fixed_file) @@ -2974,20 +2994,29 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) #else int i; - for (i = 0; i < ctx->nr_user_files; i++) - if (ctx->user_files[i]) - fput(ctx->user_files[i]); + for (i = 0; i < ctx->nr_user_files; i++) { + struct file *file; + + file = io_file_from_index(ctx, i); + if (file) + fput(file); #endif } static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { - if (!ctx->user_files) + unsigned nr_tables, i; + + if (!ctx->file_table) return -ENXIO; __io_sqe_files_unregister(ctx); - kfree(ctx->user_files); - ctx->user_files = NULL; + nr_tables = (ctx->nr_user_files + IORING_MAX_FILES_TABLE - 1) / + IORING_MAX_FILES_TABLE; + for (i = 0; i < nr_tables; i++) + kfree(ctx->file_table[i].files); + kfree(ctx->file_table); + ctx->file_table = NULL; ctx->nr_user_files = 0; return 0; } @@ -3062,9 +3091,11 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) nr_files = 0; fpl->user = get_uid(ctx->user); for (i = 0; i < nr; i++) { - if (!ctx->user_files[i + offset]) + struct file *file = io_file_from_index(ctx, i + offset); + + if (!file) continue; - fpl->fp[nr_files] = get_file(ctx->user_files[i + offset]); + fpl->fp[nr_files] = get_file(file); unix_inflight(fpl->user, fpl->fp[nr_files]); nr_files++; } @@ -3113,8 +3144,10 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx) return 0; while (total < ctx->nr_user_files) { - if (ctx->user_files[total]) - fput(ctx->user_files[total]); + struct file *file = io_file_from_index(ctx, total); + + if (file) + fput(file); total++; } @@ -3127,25 +3160,66 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx) } #endif +static int io_sqe_alloc_file_tables(struct io_ring_ctx *ctx, unsigned nr_tables, + unsigned nr_files) +{ + int i; + + for (i = 0; i < nr_tables; i++) { + struct fixed_file_table *table = &ctx->file_table[i]; + unsigned this_files; + + this_files = nr_files; + if (this_files > IORING_MAX_FILES_TABLE) + this_files = IORING_MAX_FILES_TABLE; + + table->files = kcalloc(this_files, sizeof(struct file *), + GFP_KERNEL); + if (!table->files) + break; + } + + if (i == nr_tables) + return 0; + + for (i = 0; i < nr_tables; i++) { + struct fixed_file_table *table = &ctx->file_table[i]; + kfree(table->files); + } + return 1; +} + static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) { __s32 __user *fds = (__s32 __user *) arg; + unsigned nr_tables; int fd, ret = 0; unsigned i; - if (ctx->user_files) + if (ctx->file_table) return -EBUSY; if (!nr_args) return -EINVAL; if (nr_args > IORING_MAX_FIXED_FILES) return -EMFILE; - ctx->user_files = kcalloc(nr_args, sizeof(struct file *), GFP_KERNEL); - if (!ctx->user_files) + nr_tables = (nr_args + IORING_MAX_FILES_TABLE - 1) / + IORING_MAX_FILES_TABLE; + ctx->file_table = kcalloc(nr_tables, sizeof(struct fixed_file_table), + GFP_KERNEL); + if (!ctx->file_table) return -ENOMEM; + if (io_sqe_alloc_file_tables(ctx, nr_tables, nr_args)) { + kfree(ctx->file_table); + return -ENOMEM; + } + for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { + struct fixed_file_table *table; + unsigned table_index; + ret = -EFAULT; if (copy_from_user(&fd, &fds[i], sizeof(fd))) break; @@ -3155,10 +3229,12 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, continue; } - ctx->user_files[i] = fget(fd); + table = &ctx->file_table[i >> IORING_FILE_TABLE_SHIFT]; + table_index = i & IORING_FILE_TABLE_MASK; + table->files[table_index] = fget(fd); ret = -EBADF; - if (!ctx->user_files[i]) + if (!table->files[table_index]) break; /* * Don't allow io_uring instances to be registered. If UNIX @@ -3167,20 +3243,28 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, * handle it just fine, but there's still no point in allowing * a ring fd as it doesn't support regular read/write anyway. */ - if (ctx->user_files[i]->f_op == &io_uring_fops) { - fput(ctx->user_files[i]); + if (table->files[table_index]->f_op == &io_uring_fops) { + fput(table->files[table_index]); break; } ret = 0; } if (ret) { - for (i = 0; i < ctx->nr_user_files; i++) - if (ctx->user_files[i]) - fput(ctx->user_files[i]); + for (i = 0; i < ctx->nr_user_files; i++) { + struct fixed_file_table *table; + unsigned index; + + table = &ctx->file_table[i >> IORING_FILE_TABLE_SHIFT]; + index = i & IORING_FILE_TABLE_MASK; + if (table->files[index]) + fput(table->files[index]); + } + for (i = 0; i < nr_tables; i++) + kfree(ctx->file_table[i].files); - kfree(ctx->user_files); - ctx->user_files = NULL; + kfree(ctx->file_table); + ctx->file_table = NULL; ctx->nr_user_files = 0; return ret; } @@ -3195,7 +3279,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index) { #if defined(CONFIG_UNIX) - struct file *file = ctx->user_files[index]; + struct file *file = io_file_from_index(ctx, index); struct sock *sock = ctx->ring_sock->sk; struct sk_buff_head list, *head = &sock->sk_receive_queue; struct sk_buff *skb; @@ -3251,7 +3335,7 @@ static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index) spin_unlock_irq(&head->lock); } #else - fput(ctx->user_files[index]); + fput(io_file_from_index(ctx, index)); #endif } @@ -3306,7 +3390,7 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg, int fd, i, err; __u32 done; - if (!ctx->user_files) + if (!ctx->file_table) return -ENXIO; if (!nr_args) return -EINVAL; @@ -3320,15 +3404,20 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg, done = 0; fds = (__s32 __user *) up.fds; while (nr_args) { + struct fixed_file_table *table; + unsigned index; + err = 0; if (copy_from_user(&fd, &fds[done], sizeof(fd))) { err = -EFAULT; break; } i = array_index_nospec(up.offset, ctx->nr_user_files); - if (ctx->user_files[i]) { + table = &ctx->file_table[i >> IORING_FILE_TABLE_SHIFT]; + index = i & IORING_FILE_TABLE_MASK; + if (table->files[index]) { io_sqe_file_unregister(ctx, i); - ctx->user_files[i] = NULL; + table->files[index] = NULL; } if (fd != -1) { struct file *file; @@ -3351,7 +3440,7 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg, err = -EBADF; break; } - ctx->user_files[i] = file; + table->files[index] = file; err = io_sqe_file_register(ctx, file, i); if (err) break; -- Jens Axboe