[PATCH 30/32] vfs: Allow cloning of a mount tree with open(O_PATH|O_CLONE_MOUNT) [ver #8]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Make it possible to clone a mount tree with a new pair of open flags that
are used in conjunction with O_PATH:

 (1) O_CLONE_MOUNT - Clone the mount or mount tree at the path.

 (2) O_NON_RECURSIVE - Don't clone recursively.

Note that it's not a good idea to reuse other flags (such as O_CREAT)
because the open routine for O_PATH does not give an error if any other
flags are used in conjunction with O_PATH, but rather just masks off any it
doesn't use.

The resultant file struct is marked FMODE_NEED_UNMOUNT to as it pins an
extra reference for the mount.  This will be cleared by the upcoming
move_mount() syscall when it successfully moves a cloned mount into the
filesystem tree.

Note that care needs to be taken with the error handling in do_o_path() in
the case that vfs_open() fails as the path may or may not have been
attached to the file struct and FMODE_NEED_UNMOUNT may or may not be set.
Note that O_DIRECT | O_PATH could be a problem with error handling too.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
---

 fs/fcntl.c                       |    2 +-
 fs/internal.h                    |    1 +
 fs/namei.c                       |   26 ++++++++++++++++++----
 fs/namespace.c                   |   44 ++++++++++++++++++++++++++++++++++++++
 fs/open.c                        |    7 +++++-
 include/linux/fcntl.h            |    3 ++-
 include/uapi/asm-generic/fcntl.h |    8 +++++++
 7 files changed, 83 insertions(+), 8 deletions(-)

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 60bc5bf2f4cf..42a53cf03737 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -1028,7 +1028,7 @@ static int __init fcntl_init(void)
 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 	 * is defined as O_NONBLOCK on some platforms and not on others.
 	 */
-	BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ !=
+	BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ !=
 		     HWEIGHT32(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)));
 
 	fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/internal.h b/fs/internal.h
index c29552e0522f..e3460a2e6b59 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -75,6 +75,7 @@ extern struct vfsmount *lookup_mnt(const struct path *);
 extern int finish_automount(struct vfsmount *, struct path *);
 
 extern int sb_prepare_remount_readonly(struct super_block *);
+extern int copy_mount_for_o_path(struct path *, struct path *, bool);
 
 extern void __init mnt_init(void);
 
diff --git a/fs/namei.c b/fs/namei.c
index 5cbd980b4031..acb8e27d4288 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3458,13 +3458,29 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
 
 static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
 {
-	struct path path;
-	int error = path_lookupat(nd, flags, &path);
-	if (!error) {
-		audit_inode(nd->name, path.dentry, 0);
-		error = vfs_open(&path, file, current_cred());
+	struct path path, tmp;
+	int error;
+
+	error = path_lookupat(nd, flags, &path);
+	if (error)
+		return error;
+
+	if (file->f_flags & O_CLONE_MOUNT) {
+		error = copy_mount_for_o_path(
+			&path, &tmp, !(file->f_flags & O_NON_RECURSIVE));
 		path_put(&path);
+		if (error < 0)
+			return error;
+		path = tmp;
 	}
+
+	audit_inode(nd->name, path.dentry, 0);
+	error = vfs_open(&path, file, current_cred());
+	if (error < 0 &&
+	    (flags & O_CLONE_MOUNT) &&
+	    !(file->f_mode & FMODE_NEED_UNMOUNT))
+		__detach_mounts(path.dentry);
+	path_put(&path);
 	return error;
 }
 
diff --git a/fs/namespace.c b/fs/namespace.c
index dba680aa1ea4..e73cfcdfb3d1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2218,6 +2218,50 @@ static int do_loopback(struct path *path, const char *old_name,
 	return err;
 }
 
+/*
+ * Copy the mount or mount subtree at the specified path for
+ * open(O_PATH|O_CLONE_MOUNT).
+ */
+int copy_mount_for_o_path(struct path *from, struct path *to, bool recurse)
+{
+	struct mountpoint *mp;
+	struct mount *mnt = NULL, *f = real_mount(from->mnt);
+	int ret;
+
+	mp = lock_mount(from);
+	if (IS_ERR(mp))
+		return PTR_ERR(mp);
+
+	ret = -EINVAL;
+	if (IS_MNT_UNBINDABLE(f))
+		goto out_unlock;
+
+	if (!check_mnt(f) && from->dentry->d_op != &ns_dentry_operations)
+		goto out_unlock;
+
+	if (!recurse && has_locked_children(f, from->dentry))
+		goto out_unlock;
+
+	if (recurse)
+		mnt = copy_tree(f, from->dentry, CL_COPY_MNT_NS_FILE);
+	else
+		mnt = clone_mnt(f, from->dentry, 0);
+	if (IS_ERR(mnt)) {
+		ret = PTR_ERR(mnt);
+		goto out_unlock;
+	}
+
+	mnt->mnt.mnt_flags &= ~MNT_LOCKED;
+
+	to->mnt = &mnt->mnt;
+	to->dentry = dget(from->dentry);
+	ret = 0;
+
+out_unlock:
+	unlock_mount(mp);
+	return ret;
+}
+
 static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
 {
 	int error = 0;
diff --git a/fs/open.c b/fs/open.c
index 79a8a1bd740d..27ce9c60345a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -748,6 +748,8 @@ static int do_dentry_open(struct file *f,
 
 	if (unlikely(f->f_flags & O_PATH)) {
 		f->f_mode |= FMODE_PATH;
+		if (f->f_flags & O_CLONE_MOUNT)
+			f->f_mode |= FMODE_NEED_UNMOUNT;
 		f->f_op = &empty_fops;
 		goto done;
 	}
@@ -977,8 +979,11 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
 		 * If we have O_PATH in the open flag. Then we
 		 * cannot have anything other than the below set of flags
 		 */
-		flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
+		flags &= (O_DIRECTORY | O_NOFOLLOW | O_PATH |
+			  O_CLONE_MOUNT | O_NON_RECURSIVE);
 		acc_mode = 0;
+	} else if (flags & (O_CLONE_MOUNT | O_NON_RECURSIVE)) {
+		return -EINVAL;
 	}
 
 	op->open_flag = flags;
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 27dc7a60693e..8f60e2244740 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -9,7 +9,8 @@
 	(O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \
 	 O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \
 	 FASYNC	| O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \
-	 O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
+	 O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | \
+	 O_CLONE_MOUNT | O_NON_RECURSIVE)
 
 #ifndef force_o_largefile
 #define force_o_largefile() (BITS_PER_LONG != 32)
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 0b1c7e35090c..f533e35ea19b 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -88,6 +88,14 @@
 #define __O_TMPFILE	020000000
 #endif
 
+#ifndef O_CLONE_MOUNT
+#define O_CLONE_MOUNT	040000000	/* Used with O_PATH to clone the mount subtree at path */
+#endif
+
+#ifndef O_NON_RECURSIVE
+#define O_NON_RECURSIVE	0100000000	/* Used with O_CLONE_MOUNT to only clone one mount */
+#endif
+
 /* a horrid kludge trying to make sure that this will fail on old kernels */
 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
 #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)      




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux