[RFC][PATCH] ovl: lazy copy up of data on first write

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When metacopy feature is enabled, copy up only metadata when
opening a file O_RDWR and defer copy up of data until the first
write operation.

On read operations of lazy copy up file, the lower file is opened
O_RDONLY on every vfs call until the first write operation.

XXX: mmap() of a lazy opened file will map the meta inode, so this
still needs to be fixed.

Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx>
---

Hi Miklos,

Here's a draft for lazy copy up of data.
Sending it out for your initial feedback.

When opening file for write, we always cache a real file with upper
inode, but that inode may be a meta copy.

This leaves ovl_mmap() doing the wrong thing (mapping the meta copy).

One way do deal with this would be to implement a_ops as you suggested
and copy up data on page write fault.

Do you have another idea how to make use of this RFC patch without
implementing a_aops? Or maybe there is no reason to bother?

Perhaps we could install ovl_a_aop only for files open for write
as first stage? I think that would be easy enough for me to do.
I still don't have the full picture in my head for dealing with
LOWER->LOWER_SHARED transition.

This RFC patch BTW passes unionmount and the overlay/quick xfstests
with metacopy=on/off.

The one test that does fail is the metacopy test overlay/060 because
it fails the expectation to find a data copied up file after open for
write. Changing the test as follows makes it pass:

        # Trigger data copy up and check absence of metacopy xattr.
        mount_overlay $_lowerdir
-       $XFS_IO_PROG -c "open -a $SCRATCH_MNT/$_target"
+       $XFS_IO_PROG -c "falloc 0 $_size" $SCRATCH_MNT/$_target
        echo "check properties of data copied up file"
        check_file_size_contents $SCRATCH_MNT/$_target $_size "$_data"

Thoughts?

Thanks,
Amir.


 fs/overlayfs/copy_up.c   |  5 +---
 fs/overlayfs/file.c      | 63 +++++++++++++++++++++++++++++++++-------
 fs/overlayfs/overlayfs.h | 10 ++++++-
 fs/overlayfs/util.c      |  4 +--
 4 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 48119b23375b..df495f14bce0 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -731,10 +731,7 @@ static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
 	if (!S_ISREG(mode))
 		return false;
 
-	if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
-		return false;
-
-	return true;
+	return !ovl_open_flags_need_data_copy_up(flags);
 }
 
 /* Copy up data of an inode which was copied up metadata only in the past. */
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 84dd957efa24..e15b16e386f4 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -29,10 +29,15 @@ static struct file *ovl_open_realfile(const struct file *file,
 	struct inode *inode = file_inode(file);
 	struct file *realfile;
 	const struct cred *old_cred;
+	unsigned int flags = file->f_flags | O_NOATIME;
+
+	/* Open lower file O_RDONLY if O_RDWR file data wasn't copied up yet */
+	if (S_ISREG(inode->i_mode) && realinode != ovl_inode_upper(inode))
+		flags &= ~(O_RDWR | O_WRONLY);
 
 	old_cred = ovl_override_creds(inode->i_sb);
-	realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
-				       realinode, current_cred());
+	realfile = open_with_fake_path(&file->f_path, flags, realinode,
+				       current_cred());
 	revert_creds(old_cred);
 
 	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
@@ -80,21 +85,41 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
 	return 0;
 }
 
+/*
+ * If file was opened for write with lazy copy of data, the first operation
+ * to call this function with @rdonly == false will trigger copy up of data.
+ * fdatasync() and fadvise(POSIX_FADV_DONTNEED) are "rdonly" ops in the sense
+ * that they do not dirty pages. If pages are already dirty then file data
+ * has already been copied up and those operations will get the upper file
+ * and write its inode's dirty pages.
+ */
 static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
-			       bool allow_meta)
+			       bool allow_meta, bool rdonly)
 {
+	struct dentry *dentry = file_dentry(file);
 	struct inode *inode = file_inode(file);
 	struct inode *realinode;
+	int err;
 
 	real->flags = 0;
 	real->file = file->private_data;
 
+	if (!rdonly) {
+		/* Maybe copy up data on first write op */
+		err = ovl_open_maybe_copy_up(dentry, file->f_flags);
+		if (err)
+			return err;
+	}
+
 	if (allow_meta)
 		realinode = ovl_inode_real(inode);
 	else
 		realinode = ovl_inode_realdata(inode);
 
-	/* Has it been copied up since we'd opened it? */
+	/*
+	 * Has it been copied up since we'd opened it O_RDONLY?
+	 * or not yet data copied up since we'd opened it O_RDWR?
+	 */
 	if (unlikely(file_inode(real->file) != realinode)) {
 		real->flags = FDPUT_FPUT;
 		real->file = ovl_open_realfile(file, realinode);
@@ -111,23 +136,39 @@ static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
 
 static int ovl_real_fdget(const struct file *file, struct fd *real)
 {
-	return ovl_real_fdget_meta(file, real, false);
+	return ovl_real_fdget_meta(file, real, false, false);
+}
+
+static int ovl_real_fdget_rdonly(const struct file *file, struct fd *real)
+{
+	return ovl_real_fdget_meta(file, real, false, true);
 }
 
 static int ovl_open(struct inode *inode, struct file *file)
 {
 	struct dentry *dentry = file_dentry(file);
 	struct file *realfile;
+	int copyup_flags = file->f_flags;
 	int err;
 
-	err = ovl_open_maybe_copy_up(dentry, file->f_flags);
+	/* (O_RDWR | O_WRONLY) signals that we want meta copy up */
+	if ((copyup_flags & O_ACCMODE) == O_RDWR)
+		copyup_flags |= O_WRONLY;
+
+	/* Allow to copy up meta and defer copy up data to first writable op */
+	err = ovl_open_maybe_copy_up(dentry, copyup_flags);
 	if (err)
 		return err;
 
 	/* No longer need these flags, so don't pass them on to underlying fs */
 	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
-	realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
+	/*
+	 * For lazy copy up of data, we cache the upper meta file in
+	 * anticipation for copy up of data on first write. Opening O_RDWR
+	 * and only reading is not going to be efficient in that case.
+	 */
+	realfile = ovl_open_realfile(file, ovl_inode_real(inode));
 	if (IS_ERR(realfile))
 		return PTR_ERR(realfile);
 
@@ -201,7 +242,7 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 	if (!iov_iter_count(iter))
 		return 0;
 
-	ret = ovl_real_fdget(file, &real);
+	ret = ovl_real_fdget_rdonly(file, &real);
 	if (ret)
 		return ret;
 
@@ -263,7 +304,7 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	const struct cred *old_cred;
 	int ret;
 
-	ret = ovl_real_fdget_meta(file, &real, !datasync);
+	ret = ovl_real_fdget_meta(file, &real, !datasync, true);
 	if (ret)
 		return ret;
 
@@ -339,7 +380,7 @@ static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 	const struct cred *old_cred;
 	int ret;
 
-	ret = ovl_real_fdget(file, &real);
+	ret = ovl_real_fdget_rdonly(file, &real);
 	if (ret)
 		return ret;
 
@@ -447,7 +488,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 	if (ret)
 		return ret;
 
-	ret = ovl_real_fdget(file_in, &real_in);
+	ret = ovl_real_fdget_rdonly(file_in, &real_in);
 	if (ret) {
 		fdput(real_out);
 		return ret;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 5e45cb3630a0..d3a7d8936f21 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -195,14 +195,22 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
 	return ret;
 }
 
-static inline bool ovl_open_flags_need_copy_up(int flags)
+static inline bool ovl_open_flags_need_data_copy_up(int flags)
 {
 	if (!flags)
 		return false;
 
+	/* Either O_RDWR or O_WRONLY will trigger data copy up */
 	return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
 }
 
+static inline bool ovl_open_flags_need_copy_up(int flags)
+{
+	/* O_RDWR|O_WRONLY together will trigger meta copy up */
+	return (flags & O_ACCMODE) == (O_RDWR | O_WRONLY) ||
+		ovl_open_flags_need_data_copy_up(flags);
+}
+
 /* util.c */
 int ovl_want_write(struct dentry *dentry);
 void ovl_drop_write(struct dentry *dentry);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 7c01327b1852..df6d13185f40 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -367,7 +367,7 @@ void ovl_set_upperdata(struct inode *inode)
 /* Caller should hold ovl_inode->lock */
 bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
 {
-	if (!ovl_open_flags_need_copy_up(flags))
+	if (!ovl_open_flags_need_data_copy_up(flags))
 		return false;
 
 	return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
@@ -375,7 +375,7 @@ bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
 
 bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
 {
-	if (!ovl_open_flags_need_copy_up(flags))
+	if (!ovl_open_flags_need_data_copy_up(flags))
 		return false;
 
 	return !ovl_has_upperdata(d_inode(dentry));
-- 
2.17.1




[Index of Archives]     [Linux Filesystems Devel]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux