On 12/13/19 11:22 AM, Jens Axboe wrote: > On 12/13/19 12:51 AM, Pavel Begunkov wrote: >> There is no reliable way to submit and wait in a single syscall, as >> io_submit_sqes() may under-consume sqes (in case of an early error). >> Then it will wait for not-yet-submitted requests, deadlocking the user >> in most cases. > > Why not just cap the wait_nr? If someone does to_submit = 8, wait_nr = 8, > and we only submit 4, just wait for 4? Ala: > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 81219a631a6d..4a76ccbb7856 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -5272,6 +5272,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, > submitted = io_submit_sqes(ctx, to_submit, f.file, fd, > &cur_mm, false); > mutex_unlock(&ctx->uring_lock); > + if (submitted <= 0) > + goto done; > + if (submitted != to_submit && min_complete > submitted) > + min_complete = submitted; > } > if (flags & IORING_ENTER_GETEVENTS) { > unsigned nr_events = 0; > @@ -5284,7 +5288,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, > ret = io_cqring_wait(ctx, min_complete, sig, sigsz); > } > } > - > +done: > percpu_ref_put(&ctx->refs); > out_fput: > fdput(f); > This is probably a bit cleaner, since it only adjusts if we're going to wait. diff --git a/fs/io_uring.c b/fs/io_uring.c index 81219a631a6d..e262549a2601 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5272,11 +5272,15 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, submitted = io_submit_sqes(ctx, to_submit, f.file, fd, &cur_mm, false); mutex_unlock(&ctx->uring_lock); + if (submitted <= 0) + goto done; } if (flags & IORING_ENTER_GETEVENTS) { unsigned nr_events = 0; min_complete = min(min_complete, ctx->cq_entries); + if (submitted != to_submit && min_complete > submitted) + min_complete = submitted; if (ctx->flags & IORING_SETUP_IOPOLL) { ret = io_iopoll_check(ctx, &nr_events, min_complete); @@ -5284,7 +5288,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, ret = io_cqring_wait(ctx, min_complete, sig, sigsz); } } - +done: percpu_ref_put(&ctx->refs); out_fput: fdput(f); -- Jens Axboe