Re: [PATCH vfs/for-next 00/18] fs_context fixes

David Howells <dhowells@xxxxxxxxxx> · Mon, 09 Jul 2018 16:31:59 +0100

Eric Biggers <ebiggers3@xxxxxxxxx> wrote:

Okay, I've folded in all your changes to my mount-context branch, apart from
patch 07 (the legacy wrapper double free fix) which I think needed doing
differently.

> Also, mount(..., MS_REMOUNT|MS_BIND, ...) now validates the mount options
> string, which breaks systemd unit files with ProtectControlGroups=yes (e.g.
> systemd-networkd.service) when systemd does the following to change a cgroup
> (v1) mount to read-only:
> 
>     mount(NULL, "/run/systemd/unit-root/sys/fs/cgroup/systemd", NULL, MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_REMOUNT|MS_BIND, NULL)
> 
> ... when the kernel has CONFIG_CGROUPS=y but no cgroup subsystems enabled, since
> in that case the error "cgroup1: Need name or subsystem set" is hit when the
> mount options string is empty.
> 
> Probably it doesn't make sense to validate the mount options string at all in
> the MS_REMOUNT|MS_BIND case, though maybe you had something else in mind.

How about the attached?  I need to separate it out anyway if I want to
implement a mount_setattr() syscall.

David
---
commit f24ee400978ad41e48e679f1d422277fc353521c
Author: David Howells <dhowells@xxxxxxxxxx>
Date:   Mon Jul 9 16:24:27 2018 +0100

    vfs: Separate changing mount flags full remount
    
    Separate just the changing of mount flags (MS_REMOUNT|MS_BIND) from full
    remount because the mount data now gets parsed with the new fs_context
    stuff prior to doing a remount - and this causes the syscall under some
    circumstances.

diff --git a/fs/namespace.c b/fs/namespace.c
index 58c1ace07bb6..39e9744beab9 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -275,13 +275,13 @@ static struct mount *alloc_vfsmnt(const char *name)
  * mnt_want/drop_write() will _keep_ the filesystem
  * r/w.
  */
-int __mnt_is_readonly(struct vfsmount *mnt)
+bool __mnt_is_readonly(struct vfsmount *mnt)
 {
 	if (mnt->mnt_flags & MNT_READONLY)
-		return 1;
+		return true;
 	if (sb_rdonly(mnt->mnt_sb))
-		return 1;
-	return 0;
+		return true;
+	return false;
 }
 EXPORT_SYMBOL_GPL(__mnt_is_readonly);
 
@@ -596,11 +596,12 @@ static int mnt_make_readonly(struct mount *mnt)
 	return ret;
 }
 
-static void __mnt_unmake_readonly(struct mount *mnt)
+static int __mnt_unmake_readonly(struct mount *mnt)
 {
 	lock_mount_hash();
 	mnt->mnt.mnt_flags &= ~MNT_READONLY;
 	unlock_mount_hash();
+	return 0;
 }
 
 int sb_prepare_remount_readonly(struct super_block *sb)
@@ -2307,21 +2308,85 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
 	return error;
 }
 
-static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
+/*
+ * Don't allow locked mount flags to be cleared.
+ *
+ * No locks need to be held here while testing the various MNT_LOCK
+ * flags because those flags can never be cleared once they are set.
+ */
+static bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
 {
-	int error = 0;
-	int readonly_request = 0;
+	unsigned int fl = mnt->mnt.mnt_flags;
 
-	if (ms_flags & MS_RDONLY)
+	if ((fl & MNT_LOCK_READONLY) &&
+	    !(mnt_flags & MNT_READONLY))
+		return false;
+
+	if ((fl & MNT_LOCK_NODEV) &&
+	    !(mnt_flags & MNT_NODEV))
+		return false;
+
+	if ((fl & MNT_LOCK_NOSUID) &&
+	    !(mnt_flags & MNT_NOSUID))
+		return false;
+
+	if ((fl & MNT_LOCK_NOEXEC) &&
+	    !(mnt_flags & MNT_NOEXEC))
+		return false;
+
+	if ((fl & MNT_LOCK_ATIME) &&
+	    ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
+		return false;
+
+	return true;
+}
+
+static int change_mount_ro_state(struct vfsmount *mnt, unsigned int mnt_flags)
+{
+	bool readonly_request = true;
+
+	if (mnt_flags & MNT_READONLY)
 		readonly_request = 1;
 	if (readonly_request == __mnt_is_readonly(mnt))
 		return 0;
 
-	if (readonly_request)
-		error = mnt_make_readonly(real_mount(mnt));
-	else
-		__mnt_unmake_readonly(real_mount(mnt));
-	return error;
+	if (!readonly_request)
+		return mnt_make_readonly(real_mount(mnt));
+
+	return __mnt_unmake_readonly(real_mount(mnt));
+}
+
+/*
+ * Handle reconfiguration of the mountpoint only without alteration of the
+ * superblock it refers to.  This is triggered by specifying MS_REMOUNT|MS_BIND
+ * to mount(2).
+ */
+static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
+{
+	struct super_block *sb = path->mnt->mnt_sb;
+	struct mount *mnt = real_mount(path->mnt);
+	int ret;
+
+	if (!check_mnt(mnt))
+		return -EINVAL;
+
+	if (path->dentry != path->mnt->mnt_root)
+		return -EINVAL;
+
+	if (!can_change_locked_flags(mnt, mnt_flags))
+		return -EPERM;
+
+	down_write(&sb->s_umount);
+	ret = change_mount_ro_state(path->mnt, mnt_flags);
+	if (ret == 0) {
+		lock_mount_hash();
+		mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
+		mnt->mnt.mnt_flags = mnt_flags;
+		touch_mnt_namespace(mnt->mnt_ns);
+		unlock_mount_hash();
+	}
+	up_write(&sb->s_umount);
+	return ret;
 }
 
 /*
@@ -2358,32 +2423,8 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	if (path->dentry != path->mnt->mnt_root)
 		return -EINVAL;
 
-	/* Don't allow changing of locked mnt flags.
-	 *
-	 * No locks need to be held here while testing the various
-	 * MNT_LOCK flags because those flags can never be cleared
-	 * once they are set.
-	 */
-	if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
-	    !(mnt_flags & MNT_READONLY)) {
-		return -EPERM;
-	}
-	if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
-	    !(mnt_flags & MNT_NODEV)) {
-		return -EPERM;
-	}
-	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
-	    !(mnt_flags & MNT_NOSUID)) {
-		return -EPERM;
-	}
-	if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
-	    !(mnt_flags & MNT_NOEXEC)) {
-		return -EPERM;
-	}
-	if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
-	    ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+	if (!can_change_locked_flags(mnt, mnt_flags))
 		return -EPERM;
-	}
 
 	if (type->init_fs_context) {
 		fc = vfs_sb_reconfig(path, sb_flags);
@@ -2410,9 +2451,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	}
 
 	down_write(&sb->s_umount);
-	if (ms_flags & MS_BIND)
-		err = change_mount_flags(path->mnt, ms_flags);
-	else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
+	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		err = -EPERM;
 	else
 		err = do_remount_sb(sb, sb_flags, data, data_size, 0, fc);
@@ -2961,7 +3000,9 @@ long do_mount(const char *dev_name, const char __user *dir_name,
 			    SB_LAZYTIME |
 			    SB_I_VERSION);
 
-	if (flags & MS_REMOUNT)
+	if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
+		retval = do_reconfigure_mnt(&path, mnt_flags);
+	else if (flags & MS_REMOUNT)
 		retval = do_remount(&path, flags, sb_flags, mnt_flags,
 				    data_page, data_size);
 	else if (flags & MS_BIND)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index ee5af77afc06..41b6b080ffd0 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -82,7 +82,7 @@ extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
 extern struct vfsmount *mntget(struct vfsmount *mnt);
 extern struct vfsmount *mnt_clone_internal(const struct path *path);
-extern int __mnt_is_readonly(struct vfsmount *mnt);
+extern bool __mnt_is_readonly(struct vfsmount *mnt);
 extern bool mnt_may_suid(struct vfsmount *mnt);
 
 struct path;