When bind mounts are in use, and there is another path to the filesystem it is possible to rename files or directories from a path underneath the root of the bind mount to a path that is not underneath the root of the bind mount. When a directory is moved out from under the root of a bind mount path name lookups that go up the directory tree potentially allow accessing the entire dentry tree of the filesystem. This is not expected, not what is desired and winds up being a secruity problem for userspace. Augment d_move, d_exchange to call d_common_ancestor and handle_possible_mount_escapes to mark any mount points that directories escape from. A few notes on the implementation: - d_splice_alias does not need to be touched as the only case that can result in a directory escaping calls d_move. - Only directory escapes are recorded as only those are relevant to new pathname lookup. Escaped files are handled in prepend_path. - A lock either namespace_sem or mount_lock needs to be held across the duration of renames where a directory could be escaping to ensure that a mount is not added, escaped, and missed during the rename. - The mount_lock is used as it does not sleep. I have audited all of thecallers of d_move and d_exchange and in every instance it appears safe for d_move and d_exchange to start sleeping. But there is no point in adding sleeping behavior if that is unncessary. - The locking order must be mount_lock outside of rename_lock as prepend_path already takes the locks in this order. Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> --- fs/dcache.c | 33 +++++++++++++++++++++++++++++++++ fs/mount.h | 2 ++ fs/namespace.c | 32 ++++++++++++++++++++++++++++++++ include/linux/mount.h | 1 + 4 files changed, 68 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index 1f2f51055515..7927c1fbdb93 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2704,9 +2704,23 @@ static void __d_move(struct dentry *dentry, struct dentry *target, */ void d_move(struct dentry *dentry, struct dentry *target) { + bool unlock = false; + + if (d_is_dir(dentry) && (dentry->d_parent != target->d_parent)) { + const struct dentry *ancestor; + + ancestor = d_common_ancestor(dentry, target); + read_seqlock_excl(&mount_lock); + unlock = true; + handle_possible_mount_escapee(ancestor, dentry); + } + write_seqlock(&rename_lock); __d_move(dentry, target, false); write_sequnlock(&rename_lock); + if (unlock) + read_sequnlock_excl(&mount_lock); + } EXPORT_SYMBOL(d_move); @@ -2717,6 +2731,23 @@ EXPORT_SYMBOL(d_move); */ void d_exchange(struct dentry *dentry1, struct dentry *dentry2) { + bool d1_is_dir = d_is_dir(dentry1); + bool d2_is_dir = d_is_dir(dentry2); + bool unlock = false; + + if ((d1_is_dir || d2_is_dir) && + (dentry1->d_parent != dentry2->d_parent)) { + const struct dentry *ancestor; + + ancestor = d_common_ancestor(dentry1, dentry2); + read_seqlock_excl(&mount_lock); + unlock = true; + if (d1_is_dir) + handle_possible_mount_escapee(ancestor, dentry1); + if (d2_is_dir) + handle_possible_mount_escapee(ancestor, dentry2); + } + write_seqlock(&rename_lock); WARN_ON(!dentry1->d_inode); @@ -2727,6 +2758,8 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2) __d_move(dentry1, dentry2, true); write_sequnlock(&rename_lock); + if (unlock) + read_sequnlock_excl(&mount_lock); } /** diff --git a/fs/mount.h b/fs/mount.h index e8f22970fe59..ad91963c83ac 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -107,6 +107,8 @@ static inline void detach_mounts(struct dentry *dentry) __detach_mounts(dentry); } +extern void handle_possible_mount_escapee(const struct dentry *, struct dentry *); + static inline void get_mnt_ns(struct mnt_namespace *ns) { atomic_inc(&ns->count); diff --git a/fs/namespace.c b/fs/namespace.c index af6abf476394..ddcd0b61a448 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1657,6 +1657,38 @@ out_unlock: namespace_unlock(); } +static void mark_escaped_mounts(struct dentry *root) +{ + /* Must be called with mount_lock held */ + struct mountroot *mr; + struct mount *mnt; + + mr = lookup_mountroot(root); + if (mr) { + /* Mark each mount from which a directory is escaping. + */ + hlist_for_each_entry(mnt, &mr->r_list, mnt_mr_list) + mnt->mnt.mnt_flags |= MNT_DIR_ESCAPED; + } +} + +void handle_possible_mount_escapee(const struct dentry *ancestor, + struct dentry *escapee) +{ + struct dentry *dentry; + + for (dentry = escapee->d_parent; dentry != ancestor; + dentry = dentry->d_parent) { + + if (d_mountroot(dentry)) + mark_escaped_mounts(dentry); + + /* In case there is no common ancestor */ + if (IS_ROOT(dentry)) + break; + } +} + /* * Is the caller allowed to modify his namespace? */ diff --git a/include/linux/mount.h b/include/linux/mount.h index f822c3c11377..e58bc12b19aa 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -62,6 +62,7 @@ struct mnt_namespace; #define MNT_SYNC_UMOUNT 0x2000000 #define MNT_MARKED 0x4000000 #define MNT_UMOUNT 0x8000000 +#define MNT_DIR_ESCAPED 0x10000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ -- 2.2.1 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html