On 4/1/22 10:21 AM, Jens Axboe wrote: > On 4/1/22 10:02 AM, Miklos Szeredi wrote: >> On Fri, 1 Apr 2022 at 17:36, Jens Axboe <axboe@xxxxxxxxx> wrote: >> >>> I take it you're continually reusing those slots? >> >> Yes. >> >>> If you have a test >>> case that'd be ideal. Agree that it sounds like we just need an >>> appropriate breather to allow fput/task_work to run. Or it could be the >>> deferral free of the fixed slot. >> >> Adding a breather could make the worst case latency be large. I think >> doing the fput synchronously would be better in general. > > fput() isn't sync, it'll just offload to task_work. There are some > dependencies there that would need to be checked. But we'll find a way > to deal with it. > >> I test this on an VM with 8G of memory and run the following: >> >> ./forkbomb 14 & >> # wait till 16k processes are forked >> for i in `seq 1 100`; do ./procreads u; done >> >> You can compare performance with plain reads (./procreads p), the >> other tests don't work on public kernels. > > OK, I'll check up on this, but probably won't have time to do so before > early next week. Can you try with this patch? It's not complete yet, there's actually a bunch of things we can do to improve the direct descriptor case. But this one is easy enough to pull off, and I think it'll fix your OOM case. Not a proposed patch, but it'll prove the theory. diff --git a/fs/io_uring.c b/fs/io_uring.c index 0e199040f151..d52cd9c98d6d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -231,7 +231,7 @@ struct io_rsrc_put { u64 tag; union { void *rsrc; - struct file *file; + unsigned long file_ptr; struct io_mapped_ubuf *buf; }; }; @@ -1601,7 +1601,12 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq) #define FFS_NOWAIT 0x1UL #define FFS_ISREG 0x2UL -#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG) +#if defined(CONFIG_64BIT) +#define FFS_DIRECT 0x4UL +#else +#define FFS_DIRECT 0x0UL +#endif +#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG|FFS_DIRECT) static inline bool io_req_ffs_set(struct io_kiocb *req) { @@ -7443,12 +7448,19 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, return (struct file *) (slot->file_ptr & FFS_MASK); } -static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file) +static bool io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file, + bool direct_descriptor) { unsigned long file_ptr = (unsigned long) file; + bool ret = false; file_ptr |= io_file_get_flags(file); + if (direct_descriptor) { + file_ptr |= FFS_DIRECT; + ret = true; + } file_slot->file_ptr = file_ptr; + return ret; } static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, @@ -8917,7 +8929,7 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx) static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { - struct file *file = prsrc->file; + struct file *file = (struct file *) (prsrc->file_ptr & FFS_MASK); #if defined(CONFIG_UNIX) struct sock *sock = ctx->ring_sock->sk; struct sk_buff_head list, *head = &sock->sk_receive_queue; @@ -9083,7 +9095,8 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, fput(file); goto out_fput; } - io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file); + io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file, + false); } ret = io_sqe_files_scm(ctx); @@ -9166,6 +9179,20 @@ static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, return 0; } +static int io_queue_file_removal(struct io_rsrc_data *data, unsigned idx, + struct io_rsrc_node *node, + unsigned long file_ptr) +{ + struct file *file = (struct file *) (file_ptr & FFS_MASK); + + if (file_ptr & FFS_DIRECT) { + fput(file); + return 0; + } + + return io_queue_rsrc_removal(data, idx, node, file); +} + static int io_install_fixed_file(struct io_kiocb *req, struct file *file, unsigned int issue_flags, u32 slot_index) { @@ -9189,15 +9216,13 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file, file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); if (file_slot->file_ptr) { - struct file *old_file; - ret = io_rsrc_node_switch_start(ctx); if (ret) goto err; - old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, slot_index, - ctx->rsrc_node, old_file); + ret = io_queue_file_removal(ctx->file_data, slot_index, + ctx->rsrc_node, + file_slot->file_ptr); if (ret) goto err; file_slot->file_ptr = 0; @@ -9205,13 +9230,13 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file, } *io_get_tag_slot(ctx->file_data, slot_index) = 0; - io_fixed_file_set(file_slot, file); - ret = io_sqe_file_register(ctx, file, slot_index); - if (ret) { - file_slot->file_ptr = 0; - goto err; + if (!io_fixed_file_set(file_slot, file, true)) { + ret = io_sqe_file_register(ctx, file, slot_index); + if (ret) { + file_slot->file_ptr = 0; + goto err; + } } - ret = 0; err: if (needs_switch) @@ -9228,7 +9253,6 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) struct io_ring_ctx *ctx = req->ctx; bool needs_lock = issue_flags & IO_URING_F_UNLOCKED; struct io_fixed_file *file_slot; - struct file *file; int ret, i; io_ring_submit_lock(ctx, needs_lock); @@ -9248,8 +9272,8 @@ static int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) if (!file_slot->file_ptr) goto out; - file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); + ret = io_queue_file_removal(ctx->file_data, offset, + ctx->rsrc_node, file_slot->file_ptr); if (ret) goto out; @@ -9298,9 +9322,9 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, file_slot = io_fixed_file_slot(&ctx->file_table, i); if (file_slot->file_ptr) { - file = (struct file *)(file_slot->file_ptr & FFS_MASK); - err = io_queue_rsrc_removal(data, up->offset + done, - ctx->rsrc_node, file); + err = io_queue_file_removal(data, up->offset + done, + ctx->rsrc_node, + file_slot->file_ptr); if (err) break; file_slot->file_ptr = 0; @@ -9326,7 +9350,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, break; } *io_get_tag_slot(data, up->offset + done) = tag; - io_fixed_file_set(file_slot, file); + io_fixed_file_set(file_slot, file, false); err = io_sqe_file_register(ctx, file, i); if (err) { file_slot->file_ptr = 0; -- Jens Axboe