We support locking certain mount attributes in the kernel. This API isn't directly exposed to users. Right now, users can lock mount attributes by going through the process of creating a new user namespaces, and when the mounts are copied to the "lower privilege" domain, they're locked. The mount can be reopened, and passed around as a "locked mount". Locked mounts are useful, for example, in container execution without user namespaces, where you may want to expose some host data as read only without allowing the container to remount the mount as mutable. The API currently requires that the given privilege is taken away while or before locking the flag in the less privileged position. This could be relaxed in the future, where the user is allowed to remount the mount as read only, but once they do, they cannot make it read only again. Right now, this allows for all flags that are lockable via the userns unshare trick to be locked, other than the atime related ones. This is because the semantics of what the "less privileged" position is around the atime flags is unclear. Signed-off-by: Sargun Dhillon <sargun@xxxxxxxxx> --- fs/namespace.c | 40 +++++++++++++++++++++++++++++++++++--- include/uapi/linux/mount.h | 2 ++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 54847db5b819..5396e544ac84 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -78,6 +78,7 @@ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ struct mount_kattr { unsigned int attr_set; unsigned int attr_clr; + unsigned int attr_lock; unsigned int propagation; unsigned int lookup_flags; bool recurse; @@ -3608,6 +3609,9 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, #define MOUNT_SETATTR_PROPAGATION_FLAGS \ (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED) +#define MOUNT_SETATTR_VALID_LOCK_FLAGS \ + (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \ + MOUNT_ATTR_NOEXEC) static unsigned int attr_flags_to_mnt_flags(u64 attr_flags) { @@ -3629,6 +3633,22 @@ static unsigned int attr_flags_to_mnt_flags(u64 attr_flags) return mnt_flags; } +static unsigned int attr_flags_to_mnt_lock_flags(u64 attr_flags) +{ + unsigned int mnt_flags = 0; + + if (attr_flags & MOUNT_ATTR_RDONLY) + mnt_flags |= MNT_LOCK_READONLY; + if (attr_flags & MOUNT_ATTR_NOSUID) + mnt_flags |= MNT_LOCK_NOSUID; + if (attr_flags & MOUNT_ATTR_NODEV) + mnt_flags |= MNT_LOCK_NODEV; + if (attr_flags & MOUNT_ATTR_NOEXEC) + mnt_flags |= MNT_LOCK_NOEXEC; + + return mnt_flags; +} + /* * Create a kernel mount representation for a new, prepared superblock * (specified by fs_fd) and attach to an open_tree-like file descriptor. @@ -4037,11 +4057,18 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt) int err; for (m = mnt; m; m = next_mnt(m, mnt)) { - if (!can_change_locked_flags(m, recalc_flags(kattr, m))) { + int new_mount_flags = recalc_flags(kattr, m); + + if (!can_change_locked_flags(m, new_mount_flags)) { err = -EPERM; break; } + if ((new_mount_flags & kattr->attr_lock) != kattr->attr_lock) { + err = -EINVAL; + break; + } + err = can_idmap_mount(kattr, m); if (err) break; @@ -4278,8 +4305,14 @@ static int build_mount_kattr(const struct mount_attr *attr, size_t usize, if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS) return -EINVAL; + if (attr->attr_lock & ~MOUNT_SETATTR_VALID_LOCK_FLAGS) + return -EINVAL; + kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set); kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr); + kattr->attr_lock = attr_flags_to_mnt_flags(attr->attr_lock); + kattr->attr_set |= attr_flags_to_mnt_lock_flags(attr->attr_lock); + /* * Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap, @@ -4337,7 +4370,7 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, struct mount_attr attr; struct mount_kattr kattr; - BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0); + BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER1); if (flags & ~(AT_EMPTY_PATH | AT_RECURSIVE | @@ -4360,7 +4393,8 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, /* Don't bother walking through the mounts if this is a nop. */ if (attr.attr_set == 0 && attr.attr_clr == 0 && - attr.propagation == 0) + attr.propagation == 0 && + attr.attr_lock == 0) return 0; err = build_mount_kattr(&attr, usize, &kattr, flags); diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 4d93967f8aea..de667c4f852d 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -131,9 +131,11 @@ struct mount_attr { __u64 attr_clr; __u64 propagation; __u64 userns_fd; + __u64 attr_lock; }; /* List of all mount_attr versions. */ #define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ +#define MOUNT_ATTR_SIZE_VER1 40 #endif /* _UAPI_LINUX_MOUNT_H */ -- 2.39.3