[PATCH] ovl: punt write aio completion to workqueue

Amir Goldstein <amir73il@xxxxxxxxx> · Thu, 28 Sep 2023 09:46:36 +0300

We want to protect concurrent updates of ovl inode size and mtime
(i.e. ovl_copyattr()) from aio completion context.

Punt write aio completion to a workqueue so that we can protect
ovl_copyattr() with a spinlock.

Export sb_init_dio_done_wq(), so that overlayfs can use its own
dio workqueue to punt aio completions.

Suggested-by: Jens Axboe <axboe@xxxxxxxxx>
Link: https://lore.kernel.org/r/8620dfd3-372d-4ae0-aa3f-2fe97dda1bca@xxxxxxxxx/
Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx>
---

Hi Jens,

I did not want to add an overlayfs specific workqueue for those
completions, because, as I'd mentioned before, I intend to move this
stacked file io infrastructure to common vfs code.

I figured it's fine for overlayfs (or any stacked filesystem) to use its
own s_dio_done_wq for its own private needs.

Please help me reassure that I got this right.

Thanks,
Amir.


 fs/overlayfs/file.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 fs/super.c          |  1 +
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 173cc55e47fb..b4fefd96881f 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -15,10 +15,15 @@
 #include <linux/fs.h>
 #include "overlayfs.h"
 
+#include "../internal.h"	/* for sb_init_dio_done_wq */
+
 struct ovl_aio_req {
 	struct kiocb iocb;
 	refcount_t ref;
 	struct kiocb *orig_iocb;
+	/* used for aio completion */
+	struct work_struct work;
+	long res;
 };
 
 static struct kmem_cache *ovl_aio_request_cachep;
@@ -302,6 +307,37 @@ static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
 	orig_iocb->ki_complete(orig_iocb, res);
 }
 
+static void ovl_aio_complete_work(struct work_struct *work)
+{
+	struct ovl_aio_req *aio_req = container_of(work,
+						   struct ovl_aio_req, work);
+
+	ovl_aio_rw_complete(&aio_req->iocb, aio_req->res);
+}
+
+static void ovl_aio_queue_completion(struct kiocb *iocb, long res)
+{
+	struct ovl_aio_req *aio_req = container_of(iocb,
+						   struct ovl_aio_req, iocb);
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	/*
+	 * Punt to a work queue to serialize updates of mtime/size.
+	 */
+	aio_req->res = res;
+	INIT_WORK(&aio_req->work, ovl_aio_complete_work);
+	queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
+		   &aio_req->work);
+}
+
+static int ovl_init_aio_done_wq(struct super_block *sb)
+{
+	if (sb->s_dio_done_wq)
+		return 0;
+
+	return sb_init_dio_done_wq(sb);
+}
+
 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct file *file = iocb->ki_filp;
@@ -402,6 +438,10 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	} else {
 		struct ovl_aio_req *aio_req;
 
+		ret = ovl_init_aio_done_wq(inode->i_sb);
+		if (ret)
+			goto out;
+
 		ret = -ENOMEM;
 		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
 		if (!aio_req)
@@ -411,7 +451,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 		aio_req->orig_iocb = iocb;
 		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
 		aio_req->iocb.ki_flags = ifl;
-		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		aio_req->iocb.ki_complete = ovl_aio_queue_completion;
 		refcount_set(&aio_req->ref, 2);
 		kiocb_start_write(&aio_req->iocb);
 		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
diff --git a/fs/super.c b/fs/super.c
index 2d762ce67f6e..6ab6624989f2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -2139,3 +2139,4 @@ int sb_init_dio_done_wq(struct super_block *sb)
 		destroy_workqueue(wq);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(sb_init_dio_done_wq);
-- 
2.34.1