[PATCH 2/2] ovl: implement async IO routines

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A performance regression is observed since linux v4.19 when we do aio
test using fio with iodepth 128 on overlayfs. And we found that queue
depth of the device is always 1 which is unexpected.

After investigation, it is found that commit 16914e6fc7
(“ovl: add ovl_read_iter()”) and commit 2a92e07edc
(“ovl: add ovl_write_iter()”) use do_iter_readv_writev() to submit
requests to real filesystem. Async IOs are converted to sync IOs here
and cause performance regression.

So implement async IO for stacked reading and writing.

Signed-off-by: Jiufei Xue <jiufei.xue@xxxxxxxxxxxxxxxxx>
---
 fs/overlayfs/file.c      | 97 +++++++++++++++++++++++++++++++++++++++++-------
 fs/overlayfs/overlayfs.h |  2 +
 fs/overlayfs/super.c     | 12 +++++-
 3 files changed, 95 insertions(+), 16 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index e235a63..07d94e7 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -11,6 +11,14 @@
 #include <linux/uaccess.h>
 #include "overlayfs.h"
 
+struct ovl_aio_req {
+	struct kiocb iocb;
+	struct kiocb *orig_iocb;
+	struct fd fd;
+};
+
+static struct kmem_cache *ovl_aio_request_cachep;
+
 static char ovl_whatisit(struct inode *inode, struct inode *realinode)
 {
 	if (realinode != ovl_inode_upper(inode))
@@ -225,6 +233,21 @@ static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
 	return flags;
 }
 
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+{
+	struct ovl_aio_req *aio_req = container_of(iocb, struct ovl_aio_req, iocb);
+	struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+	if (iocb->ki_flags & IOCB_WRITE)
+		file_end_write(iocb->ki_filp);
+
+	orig_iocb->ki_pos = iocb->ki_pos;
+	orig_iocb->ki_complete(orig_iocb, res, res2);
+
+	fdput(aio_req->fd);
+	kmem_cache_free(ovl_aio_request_cachep, aio_req);
+}
+
 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct file *file = iocb->ki_filp;
@@ -240,14 +263,28 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		return ret;
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
-	ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
-			    ovl_iocb_to_rwf(iocb));
+	if (is_sync_kiocb(iocb)) {
+		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
+				    ovl_iocb_to_rwf(iocb));
+		ovl_file_accessed(file);
+		fdput(real);
+	} else {
+		struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
+							       GFP_NOFS);
+		aio_req->fd = real;
+		aio_req->orig_iocb = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, real.file);
+		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
+		ovl_file_accessed(file);
+		if (ret != -EIOCBQUEUED) {
+			iocb->ki_pos = aio_req->iocb.ki_pos;
+			fdput(real);
+			kmem_cache_free(ovl_aio_request_cachep, aio_req);
+		}
+	}
 	revert_creds(old_cred);
 
-	ovl_file_accessed(file);
-
-	fdput(real);
-
 	return ret;
 }
 
@@ -275,16 +312,32 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	file_start_write(real.file);
-	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
-			     ovl_iocb_to_rwf(iocb));
-	file_end_write(real.file);
+	if (is_sync_kiocb(iocb)) {
+		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
+				     ovl_iocb_to_rwf(iocb));
+		file_end_write(real.file);
+		/* Update size */
+		ovl_copyattr(ovl_inode_real(inode), inode);
+		fdput(real);
+	} else {
+		struct ovl_aio_req *aio_req = kmem_cache_alloc(ovl_aio_request_cachep,
+							       GFP_NOFS);
+		aio_req->fd = real;
+		aio_req->orig_iocb = iocb;
+		kiocb_clone(&aio_req->iocb, iocb, real.file);
+		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
+		/* Update size */
+		ovl_copyattr(ovl_inode_real(inode), inode);
+		if (ret != -EIOCBQUEUED) {
+			iocb->ki_pos = aio_req->iocb.ki_pos;
+			file_end_write(real.file);
+			fdput(real);
+			kmem_cache_free(ovl_aio_request_cachep, aio_req);
+		}
+	}
 	revert_creds(old_cred);
 
-	/* Update size */
-	ovl_copyattr(ovl_inode_real(inode), inode);
-
-	fdput(real);
-
 out_unlock:
 	inode_unlock(inode);
 
@@ -651,3 +704,19 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
 	.copy_file_range	= ovl_copy_file_range,
 	.remap_file_range	= ovl_remap_file_range,
 };
+
+int __init ovl_init_aio_request_cache(void)
+{
+	ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
+						   sizeof(struct ovl_aio_req),
+						   0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ovl_aio_request_cachep)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ovl_exit_aio_request_cache(void)
+{
+	kmem_cache_destroy(ovl_aio_request_cachep);
+}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6934bcf..afd1631 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -416,6 +416,8 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
 
 /* file.c */
 extern const struct file_operations ovl_file_operations;
+int __init ovl_init_aio_request_cache(void);
+void ovl_exit_aio_request_cache(void);
 
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index afbcb11..83cef1f 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1739,9 +1739,17 @@ static int __init ovl_init(void)
 	if (ovl_inode_cachep == NULL)
 		return -ENOMEM;
 
+	err = ovl_init_aio_request_cache();
+	if (err) {
+		kmem_cache_destroy(ovl_inode_cachep);
+		return -ENOMEM;
+	}
+
 	err = register_filesystem(&ovl_fs_type);
-	if (err)
+	if (err) {
 		kmem_cache_destroy(ovl_inode_cachep);
+		ovl_exit_aio_request_cache();
+	}
 
 	return err;
 }
@@ -1756,7 +1764,7 @@ static void __exit ovl_exit(void)
 	 */
 	rcu_barrier();
 	kmem_cache_destroy(ovl_inode_cachep);
-
+	ovl_exit_aio_request_cache();
 }
 
 module_init(ovl_init);
-- 
1.8.3.1




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux