Hi, Dig a final round of digging, and found two sets of missing backports: 1) File position fixes 2) fsnotify fix (original is in stable, not the fixup) With these, I've verified that 5.10-stable and 5.15-stable both fully pass the liburing regression suite. Please queue up for 5.10-stable and 5.15-stable, thanks! -- Jens Axboe
From 971f9d875773c4057e666e9286e309fd779c6472 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@xxxxxxxxx> Date: Sat, 21 Jan 2023 13:38:51 -0700 Subject: [PATCH 4/4] io_uring/rw: defer fsnotify calls to task context commit b000145e9907809406d8164c3b2b8861d95aecd1 upstream. We can't call these off the kiocb completion as that might be off soft/hard irq context. Defer the calls to when we process the task_work for this request. That avoids valid complaints like: stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.0.0-rc6-syzkaller-00321-g105a36f3694e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_usage_bug kernel/locking/lockdep.c:3961 [inline] valid_state kernel/locking/lockdep.c:3973 [inline] mark_lock_irq kernel/locking/lockdep.c:4176 [inline] mark_lock.part.0.cold+0x18/0xd8 kernel/locking/lockdep.c:4632 mark_lock kernel/locking/lockdep.c:4596 [inline] mark_usage kernel/locking/lockdep.c:4527 [inline] __lock_acquire+0x11d9/0x56d0 kernel/locking/lockdep.c:5007 lock_acquire kernel/locking/lockdep.c:5666 [inline] lock_acquire+0x1ab/0x570 kernel/locking/lockdep.c:5631 __fs_reclaim_acquire mm/page_alloc.c:4674 [inline] fs_reclaim_acquire+0x115/0x160 mm/page_alloc.c:4688 might_alloc include/linux/sched/mm.h:271 [inline] slab_pre_alloc_hook mm/slab.h:700 [inline] slab_alloc mm/slab.c:3278 [inline] __kmem_cache_alloc_lru mm/slab.c:3471 [inline] kmem_cache_alloc+0x39/0x520 mm/slab.c:3491 fanotify_alloc_fid_event fs/notify/fanotify/fanotify.c:580 [inline] fanotify_alloc_event fs/notify/fanotify/fanotify.c:813 [inline] fanotify_handle_event+0x1130/0x3f40 fs/notify/fanotify/fanotify.c:948 send_to_group fs/notify/fsnotify.c:360 [inline] fsnotify+0xafb/0x1680 fs/notify/fsnotify.c:570 __fsnotify_parent+0x62f/0xa60 fs/notify/fsnotify.c:230 fsnotify_parent include/linux/fsnotify.h:77 [inline] fsnotify_file include/linux/fsnotify.h:99 [inline] fsnotify_access include/linux/fsnotify.h:309 [inline] __io_complete_rw_common+0x485/0x720 io_uring/rw.c:195 io_complete_rw+0x1a/0x1f0 io_uring/rw.c:228 iomap_dio_complete_work fs/iomap/direct-io.c:144 [inline] iomap_dio_bio_end_io+0x438/0x5e0 fs/iomap/direct-io.c:178 bio_endio+0x5f9/0x780 block/bio.c:1564 req_bio_endio block/blk-mq.c:695 [inline] blk_update_request+0x3fc/0x1300 block/blk-mq.c:825 scsi_end_request+0x7a/0x9a0 drivers/scsi/scsi_lib.c:541 scsi_io_completion+0x173/0x1f70 drivers/scsi/scsi_lib.c:971 scsi_complete+0x122/0x3b0 drivers/scsi/scsi_lib.c:1438 blk_complete_reqs+0xad/0xe0 block/blk-mq.c:1022 __do_softirq+0x1d3/0x9c6 kernel/softirq.c:571 invoke_softirq kernel/softirq.c:445 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:650 irq_exit_rcu+0x5/0x20 kernel/softirq.c:662 common_interrupt+0xa9/0xc0 arch/x86/kernel/irq.c:240 Fixes: f63cf5192fe3 ("io_uring: ensure that fsnotify is always called") Link: https://lore.kernel.org/all/20220929135627.ykivmdks2w5vzrwg@quack3/ Reported-by: syzbot+dfcc5f4da15868df7d4d@xxxxxxxxxxxxxxxxxxxxxxxxx Reported-by: Jan Kara <jack@xxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- io_uring/io_uring.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 73d261004c4a..78ed38d778f8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2705,12 +2705,6 @@ static bool io_rw_should_reissue(struct io_kiocb *req) static bool __io_complete_rw_common(struct io_kiocb *req, long res) { - if (req->rw.kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); - fsnotify_modify(req->file); - } else { - fsnotify_access(req->file); - } if (res != req->result) { if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { @@ -2763,6 +2757,20 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req)); } +static void io_req_rw_complete(struct io_kiocb *req, bool *locked) +{ + struct io_rw *rw = &req->rw; + + if (rw->kiocb.ki_flags & IOCB_WRITE) { + kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } + + io_req_task_complete(req, locked); +} + static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); @@ -2770,7 +2778,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) if (__io_complete_rw_common(req, res)) return; req->result = io_fixup_rw_res(req, res); - req->io_task_work.func = io_req_task_complete; + req->io_task_work.func = io_req_rw_complete; io_req_task_work_add(req); } -- 2.39.0
From 7c24a63b8f98541cb0dcc0e5c3070c315fdde052 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken <dylany@xxxxxx> Date: Tue, 22 Feb 2022 02:55:03 -0800 Subject: [PATCH 3/4] io_uring: do not recalculate ppos unnecessarily commit b4aec40015953b65f2f114641e7fd7714c8df8e6 upstream. There is a slight optimisation to be had by calculating the correct pos pointer inside io_kiocb_update_pos and then using that later. It seems code size drops by a bit: 000000000000a1b0 0000000000000400 t io_read 000000000000a5b0 0000000000000319 t io_write vs 000000000000a1b0 00000000000003f6 t io_read 000000000000a5b0 0000000000000310 t io_write Signed-off-by: Dylan Yudaken <dylany@xxxxxx> Reviewed-by: Pavel Begunkov <asml.silence@xxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- io_uring/io_uring.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index d9396cfaa4f3..73d261004c4a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3003,18 +3003,22 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) } } -static inline void io_kiocb_update_pos(struct io_kiocb *req) +static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) { struct kiocb *kiocb = &req->rw.kiocb; + bool is_stream = req->file->f_mode & FMODE_STREAM; if (kiocb->ki_pos == -1) { - if (!(req->file->f_mode & FMODE_STREAM)) { + if (!is_stream) { req->flags |= REQ_F_CUR_POS; kiocb->ki_pos = req->file->f_pos; + return &kiocb->ki_pos; } else { kiocb->ki_pos = 0; + return NULL; } } + return is_stream ? NULL : &kiocb->ki_pos; } static void kiocb_done(struct kiocb *kiocb, ssize_t ret, @@ -3540,6 +3544,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; struct iov_iter_state __state, *state; ssize_t ret, ret2; + loff_t *ppos; if (rw) { iter = &rw->iter; @@ -3572,9 +3577,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return ret ?: -EAGAIN; } - io_kiocb_update_pos(req); + ppos = io_kiocb_update_pos(req); - ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); + ret = rw_verify_area(READ, req->file, ppos, req->result); if (unlikely(ret)) { kfree(iovec); return ret; @@ -3678,6 +3683,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; struct iov_iter_state __state, *state; ssize_t ret, ret2; + loff_t *ppos; if (rw) { iter = &rw->iter; @@ -3708,9 +3714,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) (req->flags & REQ_F_ISREG)) goto copy_iov; - io_kiocb_update_pos(req); + ppos = io_kiocb_update_pos(req); - ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); + ret = rw_verify_area(WRITE, req->file, ppos, req->result); if (unlikely(ret)) goto out_free; -- 2.39.0
From f5c508d46babd67a2261954cf59c0a5f9ae996ae Mon Sep 17 00:00:00 2001 From: Dylan Yudaken <dylany@xxxxxx> Date: Tue, 22 Feb 2022 02:55:02 -0800 Subject: [PATCH 2/4] io_uring: update kiocb->ki_pos at execution time commit d34e1e5b396a0dbaa4a29b7138df662cfb9d8e8e upstream. Update kiocb->ki_pos at execution time rather than in io_prep_rw(). io_prep_rw() happens before the job is enqueued to a worker and so the offset might be read multiple times before being executed once. Ensures that the file position in a set of _linked_ SQEs will be only obtained after earlier SQEs have completed, and so will include their incremented file position. Signed-off-by: Dylan Yudaken <dylany@xxxxxx> Reviewed-by: Pavel Begunkov <asml.silence@xxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- io_uring/io_uring.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 14297add8485..d9396cfaa4f3 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2922,14 +2922,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, req->flags |= REQ_F_ISREG; kiocb->ki_pos = READ_ONCE(sqe->off); - if (kiocb->ki_pos == -1) { - if (!(file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = file->f_pos; - } else { - kiocb->ki_pos = 0; - } - } kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); kiocb->ki_flags = iocb_flags(kiocb->ki_filp); ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); @@ -3011,6 +3003,20 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) } } +static inline void io_kiocb_update_pos(struct io_kiocb *req) +{ + struct kiocb *kiocb = &req->rw.kiocb; + + if (kiocb->ki_pos == -1) { + if (!(req->file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = req->file->f_pos; + } else { + kiocb->ki_pos = 0; + } + } +} + static void kiocb_done(struct kiocb *kiocb, ssize_t ret, unsigned int issue_flags) { @@ -3566,6 +3572,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) return ret ?: -EAGAIN; } + io_kiocb_update_pos(req); + ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) { kfree(iovec); @@ -3700,6 +3708,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) (req->flags & REQ_F_ISREG)) goto copy_iov; + io_kiocb_update_pos(req); + ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); if (unlikely(ret)) goto out_free; -- 2.39.0
From 63e393713b5f284a6e7ac4e0e284eed2203437a4 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken <dylany@xxxxxx> Date: Tue, 22 Feb 2022 02:55:01 -0800 Subject: [PATCH 1/4] io_uring: remove duplicated calls to io_kiocb_ppos commit af9c45ecebaf1b428306f41421f4bcffe439f735 upstream. io_kiocb_ppos is called in both branches, and it seems that the compiler does not fuse this. Fusing removes a few bytes from loop_rw_iter. Before: $ nm -S fs/io_uring.o | grep loop_rw_iter 0000000000002430 0000000000000124 t loop_rw_iter After: $ nm -S fs/io_uring.o | grep loop_rw_iter 0000000000002430 000000000000010d t loop_rw_iter Signed-off-by: Dylan Yudaken <dylany@xxxxxx> Reviewed-by: Pavel Begunkov <asml.silence@xxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- io_uring/io_uring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 2caef6417260..14297add8485 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3303,6 +3303,7 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) struct kiocb *kiocb = &req->rw.kiocb; struct file *file = req->file; ssize_t ret = 0; + loff_t *ppos; /* * Don't support polled IO through this interface, and we can't @@ -3314,6 +3315,8 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) if (kiocb->ki_flags & IOCB_NOWAIT) return -EAGAIN; + ppos = io_kiocb_ppos(kiocb); + while (iov_iter_count(iter)) { struct iovec iovec; ssize_t nr; @@ -3327,10 +3330,10 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) if (rw == READ) { nr = file->f_op->read(file, iovec.iov_base, - iovec.iov_len, io_kiocb_ppos(kiocb)); + iovec.iov_len, ppos); } else { nr = file->f_op->write(file, iovec.iov_base, - iovec.iov_len, io_kiocb_ppos(kiocb)); + iovec.iov_len, ppos); } if (nr < 0) { -- 2.39.0