We want to protect concurrent updates of ovl inode size and mtime (i.e. ovl_copyattr()) from aio completion context. Punt write aio completion to a workqueue so that we can protect ovl_copyattr() with a spinlock. Export sb_init_dio_done_wq(), so that overlayfs can use its own dio workqueue to punt aio completions. Suggested-by: Jens Axboe <axboe@xxxxxxxxx> Link: https://lore.kernel.org/r/8620dfd3-372d-4ae0-aa3f-2fe97dda1bca@xxxxxxxxx/ Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx> --- Hi Jens, I did not want to add an overlayfs specific workqueue for those completions, because, as I'd mentioned before, I intend to move this stacked file io infrastructure to common vfs code. I figured it's fine for overlayfs (or any stacked filesystem) to use its own s_dio_done_wq for its own private needs. Please help me reassure that I got this right. Thanks, Amir. fs/overlayfs/file.c | 42 +++++++++++++++++++++++++++++++++++++++++- fs/super.c | 1 + 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 173cc55e47fb..b4fefd96881f 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -15,10 +15,15 @@ #include <linux/fs.h> #include "overlayfs.h" +#include "../internal.h" /* for sb_init_dio_done_wq */ + struct ovl_aio_req { struct kiocb iocb; refcount_t ref; struct kiocb *orig_iocb; + /* used for aio completion */ + struct work_struct work; + long res; }; static struct kmem_cache *ovl_aio_request_cachep; @@ -302,6 +307,37 @@ static void ovl_aio_rw_complete(struct kiocb *iocb, long res) orig_iocb->ki_complete(orig_iocb, res); } +static void ovl_aio_complete_work(struct work_struct *work) +{ + struct ovl_aio_req *aio_req = container_of(work, + struct ovl_aio_req, work); + + ovl_aio_rw_complete(&aio_req->iocb, aio_req->res); +} + +static void ovl_aio_queue_completion(struct kiocb *iocb, long res) +{ + struct ovl_aio_req *aio_req = container_of(iocb, + struct ovl_aio_req, iocb); + struct kiocb *orig_iocb = aio_req->orig_iocb; + + /* + * Punt to a work queue to serialize updates of mtime/size. + */ + aio_req->res = res; + INIT_WORK(&aio_req->work, ovl_aio_complete_work); + queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq, + &aio_req->work); +} + +static int ovl_init_aio_done_wq(struct super_block *sb) +{ + if (sb->s_dio_done_wq) + return 0; + + return sb_init_dio_done_wq(sb); +} + static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; @@ -402,6 +438,10 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) } else { struct ovl_aio_req *aio_req; + ret = ovl_init_aio_done_wq(inode->i_sb); + if (ret) + goto out; + ret = -ENOMEM; aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); if (!aio_req) @@ -411,7 +451,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); aio_req->iocb.ki_flags = ifl; - aio_req->iocb.ki_complete = ovl_aio_rw_complete; + aio_req->iocb.ki_complete = ovl_aio_queue_completion; refcount_set(&aio_req->ref, 2); kiocb_start_write(&aio_req->iocb); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); diff --git a/fs/super.c b/fs/super.c index 2d762ce67f6e..6ab6624989f2 100644 --- a/fs/super.c +++ b/fs/super.c @@ -2139,3 +2139,4 @@ int sb_init_dio_done_wq(struct super_block *sb) destroy_workqueue(wq); return 0; } +EXPORT_SYMBOL_GPL(sb_init_dio_done_wq); -- 2.34.1