[PATCH v2 2/3] ovl: do not open/llseek lower file with upper sb_writers held

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



overlayfs file open (ovl_maybe_lookup_lowerdata) and overlay file llseek
take the ovl_inode_lock, without holding upper sb_writers.

In case of nested lower overlay that uses same upper fs as this overlay,
lockdep will warn about (possibly false positive) circular lock
dependency when doing open/llseek of lower ovl file during copy up with
our upper sb_writers held, because the locking ordering seems reverse to
the locking order in ovl_copy_up_start():

- lower ovl_inode_lock
- upper sb_writers

Take upper sb_writers only when we actually need it, so we won't hold it
during lower file open and lower file llseek to avoid the lockdep warning.

Minimizing the scope of ovl_want_write() during copy up is also needed
for fixing other possible deadlocks by following patches.

Signed-off-by: Amir Goldstein <amir73il@xxxxxxxxx>
---
 fs/overlayfs/copy_up.c | 117 +++++++++++++++++++++++++++++++----------
 1 file changed, 88 insertions(+), 29 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index c998dab440f8..f2a31ff790fb 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -251,8 +251,13 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 	if (IS_ERR(old_file))
 		return PTR_ERR(old_file);
 
+	error = ovl_want_write(dentry);
+	if (error)
+		goto out_fput;
+
 	/* Try to use clone_file_range to clone up within the same fs */
 	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
+	ovl_drop_write(dentry);
 	if (cloned == len)
 		goto out_fput;
 	/* Couldn't clone, so now we try to copy the data */
@@ -287,8 +292,12 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 		 * it may not recognize all kind of holes and sometimes
 		 * only skips partial of hole area. However, it will be
 		 * enough for most of the use cases.
+		 *
+		 * We do not hold upper sb_writers throughout the loop to avert
+		 * lockdep warning with llseek of lower file in nested overlay:
+		 * - upper sb_writers
+		 * -- lower ovl_inode_lock (ovl_llseek)
 		 */
-
 		if (skip_hole && data_pos < old_pos) {
 			data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
 			if (data_pos > old_pos) {
@@ -303,9 +312,14 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
 			}
 		}
 
+		error = ovl_want_write(dentry);
+		if (error)
+			break;
+
 		bytes = do_splice_direct(old_file, &old_pos,
 					 new_file, &new_pos,
 					 this_len, SPLICE_F_MOVE);
+		ovl_drop_write(dentry);
 		if (bytes <= 0) {
 			error = bytes;
 			break;
@@ -555,14 +569,18 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
 	struct inode *udir = d_inode(upperdir);
 
+	err = ovl_want_write(c->dentry);
+	if (err)
+		return err;
+
 	/* Mark parent "impure" because it may now contain non-pure upper */
 	err = ovl_set_impure(c->parent, upperdir);
 	if (err)
-		return err;
+		goto out_drop_write;
 
 	err = ovl_set_nlink_lower(c->dentry);
 	if (err)
-		return err;
+		goto out_drop_write;
 
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 	upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir,
@@ -581,10 +599,12 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 	}
 	inode_unlock(udir);
 	if (err)
-		return err;
+		goto out_drop_write;
 
 	err = ovl_set_nlink_upper(c->dentry);
 
+out_drop_write:
+	ovl_drop_write(c->dentry);
 	return err;
 }
 
@@ -710,7 +730,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 	struct path path = { .mnt = ovl_upper_mnt(ofs) };
 	struct dentry *temp, *upper;
 	struct ovl_cu_creds cc;
-	int err;
+	int err, err2;
 	struct ovl_cattr cattr = {
 		/* Can't properly set mode on creation because of the umask */
 		.mode = c->stat.mode & S_IFMT,
@@ -718,21 +738,22 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 		.link = c->link
 	};
 
-	/* workdir and destdir could be the same when copying up to indexdir */
-	err = -EIO;
-	if (lock_rename(c->workdir, c->destdir) != NULL)
-		goto unlock;
-
 	err = ovl_prep_cu_creds(c->dentry, &cc);
 	if (err)
-		goto unlock;
+		return err;
 
-	temp = ovl_create_temp(ofs, c->workdir, &cattr);
+	err = ovl_want_write(c->dentry);
+	if (!err) {
+		inode_lock(d_inode(c->workdir));
+		temp = ovl_create_temp(ofs, c->workdir, &cattr);
+		inode_unlock(d_inode(c->workdir));
+		ovl_drop_write(c->dentry);
+		if (IS_ERR(temp))
+			err = PTR_ERR(temp);
+	}
 	ovl_revert_cu_creds(&cc);
-
-	err = PTR_ERR(temp);
-	if (IS_ERR(temp))
-		goto unlock;
+	if (err)
+		return err;
 
 	/*
 	 * Copy up data first and then xattrs. Writing data after
@@ -740,6 +761,21 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 	 */
 	path.dentry = temp;
 	err = ovl_copy_up_data(c, &path);
+	/*
+	 * Request write access, lock workdir and destdir and make sure that
+	 * temp wasn't moved before copy up completion or cleanup.
+	 * workdir and destdir could be the same when copying up to indexdir.
+	 */
+	err2 = ovl_want_write(c->dentry);
+	if (err2)
+		return err ?: err2;
+
+	if (lock_rename(c->workdir, c->destdir) != NULL ||
+	    temp->d_parent != c->workdir) {
+		err = err ?: -EIO;
+		goto unlock;
+	}
+
 	if (err)
 		goto cleanup;
 
@@ -778,6 +814,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 		ovl_set_flag(OVL_WHITEOUTS, inode);
 unlock:
 	unlock_rename(c->workdir, c->destdir);
+	ovl_drop_write(c->dentry);
 
 	return err;
 
@@ -801,11 +838,16 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 	if (err)
 		return err;
 
-	tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
+	err = ovl_want_write(c->dentry);
+	if (!err) {
+		tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
+		ovl_drop_write(c->dentry);
+		if (IS_ERR(tmpfile))
+			err = PTR_ERR(tmpfile);
+	}
 	ovl_revert_cu_creds(&cc);
-
-	if (IS_ERR(tmpfile))
-		return PTR_ERR(tmpfile);
+	if (err)
+		return err;
 
 	temp = tmpfile->f_path.dentry;
 	if (!c->metacopy && c->stat.size) {
@@ -814,10 +856,14 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 			goto out_fput;
 	}
 
-	err = ovl_copy_up_metadata(c, temp);
+	err = ovl_want_write(c->dentry);
 	if (err)
 		goto out_fput;
 
+	err = ovl_copy_up_metadata(c, temp);
+	if (err)
+		goto out_drop_write;
+
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 
 	upper = ovl_lookup_upper(ofs, c->destname.name, c->destdir,
@@ -830,7 +876,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 	inode_unlock(udir);
 
 	if (err)
-		goto out_fput;
+		goto out_drop_write;
 
 	if (c->metacopy_digest)
 		ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
@@ -842,6 +888,8 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 		ovl_set_upperdata(d_inode(c->dentry));
 	ovl_inode_update(d_inode(c->dentry), dget(temp));
 
+out_drop_write:
+	ovl_drop_write(c->dentry);
 out_fput:
 	fput(tmpfile);
 	return err;
@@ -892,7 +940,12 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		 * Mark parent "impure" because it may now contain non-pure
 		 * upper
 		 */
+		err = ovl_want_write(c->dentry);
+		if (err)
+			return err;
+
 		err = ovl_set_impure(c->parent, c->destdir);
+		ovl_drop_write(c->dentry);
 		if (err)
 			return err;
 	}
@@ -908,6 +961,10 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 	if (c->indexed)
 		ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
 
+	err = ovl_want_write(c->dentry);
+	if (err)
+		goto out;
+
 	if (to_index) {
 		/* Initialize nlink for copy up of disconnected dentry */
 		err = ovl_set_nlink_upper(c->dentry);
@@ -923,6 +980,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 		ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
 	}
 
+	ovl_drop_write(c->dentry);
 out:
 	if (to_index)
 		kfree(c->destname.name);
@@ -1006,6 +1064,10 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
 	if (err)
 		goto out_free;
 
+	err = ovl_want_write(c->dentry);
+	if (err)
+		goto out_free;
+
 	/*
 	 * Writing to upper file will clear security.capability xattr. We
 	 * don't want that to happen for normal copy-up operation.
@@ -1014,17 +1076,19 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
 		err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS,
 				      capability, cap_size, 0);
 		if (err)
-			goto out_free;
+			goto out_drop_write;
 	}
 
 
 	err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
 	if (err)
-		goto out_free;
+		goto out_drop_write;
 
 	ovl_clear_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
 	ovl_clear_flag(OVL_VERIFIED_DIGEST, d_inode(c->dentry));
 	ovl_set_upperdata(d_inode(c->dentry));
+out_drop_write:
+	ovl_drop_write(c->dentry);
 out_free:
 	kfree(capability);
 out:
@@ -1088,17 +1152,12 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 		goto out;
 	}
 
-	err = ovl_want_write(dentry);
-	if (err)
-		goto out;
-
 	if (!ovl_dentry_upper(dentry))
 		err = ovl_do_copy_up(&ctx);
 	if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
 		err = ovl_link_up(&ctx);
 	if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
 		err = ovl_copy_up_meta_inode_data(&ctx);
-	ovl_drop_write(dentry);
 	ovl_copy_up_end(dentry);
 out:
 	do_delayed_call(&done);
-- 
2.34.1




[Index of Archives]     [Linux Filesystems Devel]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux