On 11/5/24 7:12 PM, Sasha Levin wrote: > The patch below does not apply to the v5.15-stable tree. > If someone wants it applied there, or to any other stable or longterm > tree, then please email the backport, including the original git commit > id to <stable@xxxxxxxxxxxxxxx>. Here's a tested series for 5.15-stable. I'll send the 5.10-stable series after this. -- Jens Axboe
From 88e83dcf06ec472f7bbb228dc1eda0c61abeddf3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein <amir73il@xxxxxxxxx> Date: Thu, 17 Aug 2023 17:13:31 +0300 Subject: [PATCH 1/4] io_uring: rename kiocb_end_write() local helper Commit a370167fe526123637965f60859a9f1f3e1a58b7 upstream. This helper does not take a kiocb as input and we want to create a common helper by that name that takes a kiocb as input. Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> Reviewed-by: Jan Kara <jack@xxxxxxx> Reviewed-by: Jens Axboe <axboe@xxxxxxxxx> Message-Id: <20230817141337.1025891-2-amir73il@xxxxxxxxx> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> --- io_uring/io_uring.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f1ab0cd98727..7afa3827bfd5 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2672,7 +2672,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) return ret; } -static void kiocb_end_write(struct io_kiocb *req) +static void io_req_end_write(struct io_kiocb *req) { /* * Tell lockdep we inherited freeze protection from submission @@ -2742,7 +2742,7 @@ static void io_req_io_end(struct io_kiocb *req) struct io_rw *rw = &req->rw; if (rw->kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); + io_req_end_write(req); fsnotify_modify(req->file); } else { fsnotify_access(req->file); @@ -2822,7 +2822,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); + io_req_end_write(req); if (unlikely(res != req->result)) { if (res == -EAGAIN && io_rw_should_reissue(req)) { req->flags |= REQ_F_REISSUE; @@ -3822,7 +3822,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (!ret) { if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); + io_req_end_write(req); return -EAGAIN; } return ret; -- 2.45.2
From 3638702b278288b29ce24b52a94816b90088b35b Mon Sep 17 00:00:00 2001 From: Amir Goldstein <amir73il@xxxxxxxxx> Date: Thu, 17 Aug 2023 17:13:33 +0300 Subject: [PATCH 2/4] fs: create kiocb_{start,end}_write() helpers Commit ed0360bbab72b829437b67ebb2f9cfac19f59dfe upstream. aio, io_uring, cachefiles and overlayfs, all open code an ugly variant of file_{start,end}_write() to silence lockdep warnings. Create helpers for this lockdep dance so we can use the helpers in all the callers. Suggested-by: Jan Kara <jack@xxxxxxx> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> Reviewed-by: Jan Kara <jack@xxxxxxx> Reviewed-by: Jens Axboe <axboe@xxxxxxxxx> Message-Id: <20230817141337.1025891-4-amir73il@xxxxxxxxx> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> --- include/linux/fs.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6ff6ade229a0..2ef0e48c89ec 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3058,6 +3058,42 @@ static inline void file_end_write(struct file *file) __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); } +/** + * kiocb_start_write - get write access to a superblock for async file io + * @iocb: the io context we want to submit the write with + * + * This is a variant of sb_start_write() for async io submission. + * Should be matched with a call to kiocb_end_write(). + */ +static inline void kiocb_start_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + sb_start_write(inode->i_sb); + /* + * Fool lockdep by telling it the lock got released so that it + * doesn't complain about the held lock when we return to userspace. + */ + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); +} + +/** + * kiocb_end_write - drop write access to a superblock after async file io + * @iocb: the io context we sumbitted the write with + * + * Should be matched with a call to kiocb_start_write(). + */ +static inline void kiocb_end_write(struct kiocb *iocb) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + /* + * Tell lockdep we inherited freeze protection from submission thread. + */ + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + sb_end_write(inode->i_sb); +} + /* * This is used for regular files where some users -- especially the * currently executed binary in a process, previously handled via -- 2.45.2
From 40a85a3345e4918168cf26ea36dd387056c86cde Mon Sep 17 00:00:00 2001 From: Amir Goldstein <amir73il@xxxxxxxxx> Date: Thu, 17 Aug 2023 17:13:34 +0300 Subject: [PATCH 3/4] io_uring: use kiocb_{start,end}_write() helpers Commit e484fd73f4bdcb00c2188100c2d84e9f3f5c9f7d upstream. Use helpers instead of the open coded dance to silence lockdep warnings. Suggested-by: Jan Kara <jack@xxxxxxx> Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> Reviewed-by: Jan Kara <jack@xxxxxxx> Reviewed-by: Jens Axboe <axboe@xxxxxxxxx> Message-Id: <20230817141337.1025891-5-amir73il@xxxxxxxxx> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> --- io_uring/io_uring.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 7afa3827bfd5..718ab6a41842 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2674,15 +2674,10 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) static void io_req_end_write(struct io_kiocb *req) { - /* - * Tell lockdep we inherited freeze protection from submission - * thread. - */ if (req->flags & REQ_F_ISREG) { - struct super_block *sb = file_inode(req->file)->i_sb; + struct io_rw *rw = &req->rw; - __sb_writers_acquired(sb, SB_FREEZE_WRITE); - sb_end_write(sb); + kiocb_end_write(&rw->kiocb); } } @@ -3775,18 +3770,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret)) goto out_free; - /* - * Open-code file_start_write here to grab freeze protection, - * which will be released by another thread in - * io_complete_rw(). Fool lockdep by telling it the lock got - * released so that it doesn't complain about the held lock when - * we return to userspace. - */ - if (req->flags & REQ_F_ISREG) { - sb_start_write(file_inode(req->file)->i_sb); - __sb_writers_release(file_inode(req->file)->i_sb, - SB_FREEZE_WRITE); - } + if (req->flags & REQ_F_ISREG) + kiocb_start_write(kiocb); kiocb->ki_flags |= IOCB_WRITE; if (req->file->f_op->write_iter) -- 2.45.2
From 085b08edc11c8562b9c9688d48d44d2eaa1f0af8 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@xxxxxxxxx> Date: Thu, 31 Oct 2024 08:05:44 -0600 Subject: [PATCH 4/4] io_uring/rw: fix missing NOWAIT check for O_DIRECT start write Commit 1d60d74e852647255bd8e76f5a22dc42531e4389 upstream. When io_uring starts a write, it'll call kiocb_start_write() to bump the super block rwsem, preventing any freezes from happening while that write is in-flight. The freeze side will grab that rwsem for writing, excluding any new writers from happening and waiting for existing writes to finish. But io_uring unconditionally uses kiocb_start_write(), which will block if someone is currently attempting to freeze the mount point. This causes a deadlock where freeze is waiting for previous writes to complete, but the previous writes cannot complete, as the task that is supposed to complete them is blocked waiting on starting a new write. This results in the following stuck trace showing that dependency with the write blocked starting a new write: task:fio state:D stack:0 pid:886 tgid:886 ppid:876 Call trace: __switch_to+0x1d8/0x348 __schedule+0x8e8/0x2248 schedule+0x110/0x3f0 percpu_rwsem_wait+0x1e8/0x3f8 __percpu_down_read+0xe8/0x500 io_write+0xbb8/0xff8 io_issue_sqe+0x10c/0x1020 io_submit_sqes+0x614/0x2110 __arm64_sys_io_uring_enter+0x524/0x1038 invoke_syscall+0x74/0x268 el0_svc_common.constprop.0+0x160/0x238 do_el0_svc+0x44/0x60 el0_svc+0x44/0xb0 el0t_64_sync_handler+0x118/0x128 el0t_64_sync+0x168/0x170 INFO: task fsfreeze:7364 blocked for more than 15 seconds. Not tainted 6.12.0-rc5-00063-g76aaf945701c #7963 with the attempting freezer stuck trying to grab the rwsem: task:fsfreeze state:D stack:0 pid:7364 tgid:7364 ppid:995 Call trace: __switch_to+0x1d8/0x348 __schedule+0x8e8/0x2248 schedule+0x110/0x3f0 percpu_down_write+0x2b0/0x680 freeze_super+0x248/0x8a8 do_vfs_ioctl+0x149c/0x1b18 __arm64_sys_ioctl+0xd0/0x1a0 invoke_syscall+0x74/0x268 el0_svc_common.constprop.0+0x160/0x238 do_el0_svc+0x44/0x60 el0_svc+0x44/0xb0 el0t_64_sync_handler+0x118/0x128 el0t_64_sync+0x168/0x170 Fix this by having the io_uring side honor IOCB_NOWAIT, and only attempt a blocking grab of the super block rwsem if it isn't set. For normal issue where IOCB_NOWAIT would always be set, this returns -EAGAIN which will have io_uring core issue a blocking attempt of the write. That will in turn also get completions run, ensuring forward progress. Since freezing requires CAP_SYS_ADMIN in the first place, this isn't something that can be triggered by a regular user. Cc: stable@xxxxxxxxxxxxxxx # 5.10+ Reported-by: Peter Mann <peter.mann@xxxxx> Link: https://lore.kernel.org/io-uring/38c94aec-81c9-4f62-b44e-1d87f5597644@xxxxx Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- io_uring/io_uring.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 718ab6a41842..b53099b595cc 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3724,6 +3724,25 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_prep_rw(req, sqe, WRITE); } +static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb) +{ + struct inode *inode; + bool ret; + + if (!(req->flags & REQ_F_ISREG)) + return true; + if (!(kiocb->ki_flags & IOCB_NOWAIT)) { + kiocb_start_write(kiocb); + return true; + } + + inode = file_inode(kiocb->ki_filp); + ret = sb_start_write_trylock(inode->i_sb); + if (ret) + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); + return ret; +} + static int io_write(struct io_kiocb *req, unsigned int issue_flags) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; @@ -3770,8 +3789,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret)) goto out_free; - if (req->flags & REQ_F_ISREG) - kiocb_start_write(kiocb); + if (unlikely(!io_kiocb_start_write(req, kiocb))) + goto copy_iov; kiocb->ki_flags |= IOCB_WRITE; if (req->file->f_op->write_iter) -- 2.45.2