Searching for "rename bint mount exdev" shows that failure with EXDEV seems somewhat unintuitive behaviour. Allow users to bypass this restriction with "-o noexdev" flag if the source of operation is on such mount. Keep old semantics default so "mount --bind /tmp /tmp" works. "mount --bind" will inherit "noexdev" flag from parent mount but it can be cleared with mount(MS_REMOUNT) so it is possible to create exclave with regular mount point crossing rules inside mount with relaxed mount point rules. Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx> --- fs/namei.c | 4 ++-- fs/namespace.c | 2 ++ fs/proc_namespace.c | 1 + include/linux/mount.h | 15 ++++++++++++++- include/uapi/linux/fs.h | 1 + 5 files changed, 20 insertions(+), 3 deletions(-) --- a/fs/namei.c +++ b/fs/namei.c @@ -4118,7 +4118,7 @@ retry: goto out; error = -EXDEV; - if (old_path.mnt != new_path.mnt) + if (!mnt_can_cross(old_path.mnt, new_path.mnt)) goto out_dput; error = may_linkat(&old_path); if (unlikely(error)) @@ -4379,7 +4379,7 @@ retry: } error = -EXDEV; - if (old_path.mnt != new_path.mnt) + if (!mnt_can_cross(old_path.mnt, new_path.mnt)) goto exit2; error = -EBUSY; --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2708,6 +2708,8 @@ long do_mount(const char *dev_name, const char __user *dir_name, mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; + if (flags & MS_NOEXDEV) + mnt_flags |= MNT_NOEXDEV; /* The default atime for remount is preservation */ if ((flags & MS_REMOUNT) && --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -67,6 +67,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) { MNT_NOATIME, ",noatime" }, { MNT_NODIRATIME, ",nodiratime" }, { MNT_RELATIME, ",relatime" }, + { MNT_NOEXDEV, ",noexdev" }, { 0, NULL } }; const struct proc_fs_info *fs_infop; --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -28,6 +28,7 @@ struct mnt_namespace; #define MNT_NODIRATIME 0x10 #define MNT_RELATIME 0x20 #define MNT_READONLY 0x40 /* does the user want this to be r/o? */ +#define MNT_NOEXDEV 0x80 /* allow link(), rename() to cross mount point */ #define MNT_SHRINKABLE 0x100 #define MNT_WRITE_HOLD 0x200 @@ -44,7 +45,7 @@ struct mnt_namespace; #define MNT_SHARED_MASK (MNT_UNBINDABLE) #define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY) + | MNT_READONLY | MNT_NOEXDEV) #define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) #define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ @@ -95,4 +96,16 @@ extern void mark_mounts_for_expiry(struct list_head *mounts); extern dev_t name_to_dev_t(const char *name); +/* + * Can operation be done in mnt1 => mnt2 direction? + * Not symmetric relation! + */ +static inline bool mnt_can_cross(struct vfsmount *mnt1, struct vfsmount *mnt2) +{ + if (mnt1 == mnt2) + return true; + if ((mnt1->mnt_flags & MNT_NOEXDEV) && mnt1->mnt_sb == mnt2->mnt_sb) + return true; + return false; +} #endif /* _LINUX_MOUNT_H */ --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -130,6 +130,7 @@ struct inodes_stat_t { #define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ +#define MS_NOEXDEV (1<<26) /* Allow link(), rename() to cross mount point */ /* These sb flags are internal to the kernel */ #define MS_NOSEC (1<<28) -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html