Create and tear down union mount structures on mount. Check requirements for union mounts. Thanks to Felix Fietkau <nbd@xxxxxxxxxxx> for a bug fix. Signed-off-by: Jan Blunck <jblunck@xxxxxxx> Signed-off-by: Valerie Aurora <vaurora@xxxxxxxxxx> --- fs/namespace.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/union.c | 56 ++++++++++++++++++++++ include/linux/union.h | 5 ++ 3 files changed, 183 insertions(+), 1 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index fc56bf7..c994173 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -29,6 +29,7 @@ #include <linux/log2.h> #include <linux/idr.h> #include <linux/fs_struct.h> +#include <linux/union.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include "pnode.h" @@ -157,6 +158,9 @@ struct vfsmount *alloc_vfsmnt(const char *name) #else mnt->mnt_writers = 0; #endif +#ifdef CONFIG_UNION_MOUNT + INIT_LIST_HEAD(&mnt->mnt_unions); +#endif } return mnt; @@ -492,6 +496,7 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) static void detach_mnt(struct vfsmount *mnt, struct path *old_path) { + detach_mnt_union(mnt); old_path->dentry = mnt->mnt_mountpoint; old_path->mnt = mnt->mnt_parent; mnt->mnt_parent = mnt; @@ -515,6 +520,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(path->mnt, path->dentry)); list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); + attach_mnt_union(mnt, path->mnt, path->dentry); } /* @@ -537,6 +543,7 @@ static void commit_tree(struct vfsmount *mnt) list_add_tail(&mnt->mnt_hash, mount_hashtable + hash(parent, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); + attach_mnt_union(mnt, mnt->mnt_parent, mnt->mnt_mountpoint); touch_mnt_namespace(n); } @@ -1025,6 +1032,7 @@ void release_mounts(struct list_head *head) struct dentry *dentry; struct vfsmount *m; spin_lock(&vfsmount_lock); + detach_mnt_union(mnt); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; @@ -1143,6 +1151,13 @@ static int do_umount(struct vfsmount *mnt, int flags) spin_unlock(&vfsmount_lock); if (retval) security_sb_umount_busy(mnt); + /* + * If this was a union mount, we are no longer a read-only + * user on the underlying mount. + */ + if (mnt->mnt_flags & MNT_UNION) + dec_hard_readonly_users(mnt->mnt_parent); + up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1483,6 +1498,17 @@ static int do_change_type(struct path *path, int flag) return -EINVAL; down_write(&namespace_sem); + + /* + * Mounts of file systems with read-only users can't deal with + * mount/umount propagation events - it's the moral equivalent + * of rm -rf dir/ or the like. + */ + if (sb_is_hard_readonly(mnt->mnt_sb)) { + err = -EROFS; + goto out_unlock; + } + if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -1500,6 +1526,77 @@ static int do_change_type(struct path *path, int flag) } /* + * Mount-time check of upper and lower layer file systems to see if we + * can union mount one on the other. + * + * Note on union mounts and mount event propagation: The lower + * layer(s) of a union mount must not have any changes to its + * namespace. Therefore, it must not be part of any mount event + * propagation group - i.e., shared or slave. MNT_SHARED and + * MNT_SLAVE are not set at mount, but in do_change_type(), which + * prevents setting these flags on file systems with read-only users, + * which includes the lower layer(s) of a union mount. + */ + +static int +check_union_mnt(struct path *mntpnt, struct vfsmount *topmost_mnt, int mnt_flags) +{ + struct vfsmount *lower_mnt = mntpnt->mnt; + + if (!(mnt_flags & MNT_UNION)) + return 0; + +#ifndef CONFIG_UNION_MOUNT + return -EINVAL; +#endif + /* + * We can't deal with namespace changes in the lower layers of + * a union, so the lower layer must be read-only. Note that + * we could possibly convert a read-write unioned mount into a + * read-only mount here, which would give us a way to union + * more than one layer with separate mount commands. But + * first we have to solve the locking order problems with more + * than two layers of union. + */ + if (!(lower_mnt->mnt_sb->s_flags & MS_RDONLY)) + return -EBUSY; + + /* + * WRITEME: For simplicity, the lower layer can't have + * submounts. If there's a good reason, we could recursively + * check the whole subtree for read-only-ness, etc. and it + * would probably work fine. + */ + if (!list_empty(&lower_mnt->mnt_mounts)) + return -EBUSY; + + /* + * Only permit unioning of file systems at their root + * directories. This allows us to mark entire mounts as + * unioned. Otherwise we must slowly and expensively work our + * way up a path looking for a unioned directory before we + * know if a path is from a unioned lower layer. + */ + + if (!IS_ROOT(mntpnt->dentry)) + return -EINVAL; + + /* + * Topmost layer must be writable to support our readdir() + * solution of copying up all lower level entries to the + * topmost layer. + */ + if (mnt_flags & MNT_READONLY) + return -EROFS; + + /* Topmost file system must support whiteouts and fallthrus. */ + if (!(topmost_mnt->mnt_sb->s_flags & MS_WHITEOUT)) + return -EINVAL; + + return 0; +} + +/* * do loopback mount. */ static int do_loopback(struct path *path, char *old_name, @@ -1520,6 +1617,9 @@ static int do_loopback(struct path *path, char *old_name, err = -EINVAL; if (IS_MNT_UNBINDABLE(old_path.mnt)) goto out; + /* Mount part of a union mount elsewhere? The mind boggles. */ + if (IS_MNT_UNION(old_path.mnt)) + goto out; if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) goto out; @@ -1541,7 +1641,6 @@ static int do_loopback(struct path *path, char *old_name, spin_unlock(&vfsmount_lock); release_mounts(&umount_list); } - out: up_write(&namespace_sem); path_put(&old_path); @@ -1582,6 +1681,9 @@ static int do_remount(struct path *path, int flags, int mnt_flags, if (!check_mnt(path->mnt)) return -EINVAL; + if (mnt_flags & MNT_UNION) + return -EINVAL; + if (path->dentry != path->mnt->mnt_root) return -EINVAL; @@ -1634,6 +1736,9 @@ static int do_move_mount(struct path *path, char *old_name) while (d_mountpoint(path->dentry) && follow_down(path)) ; + /* Get the lowest layer of a union mount to move the whole stack */ + while (union_down_one(&old_path.mnt, &old_path.dentry)) + ; err = -EINVAL; if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) goto out; @@ -1746,10 +1851,18 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) goto unlock; + err = check_union_mnt(path, newmnt, mnt_flags); + if (err) + goto unlock; + newmnt->mnt_flags = mnt_flags; if ((err = graft_tree(newmnt, path))) goto unlock; + /* Union mounts require the lower layer to always be read-only */ + if (mnt_flags & MNT_UNION) + inc_hard_readonly_users(newmnt->mnt_parent); + if (fslist) /* add to the specified expiration list */ list_add_tail(&newmnt->mnt_expire, fslist); @@ -2260,6 +2373,14 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, if (d_unlinked(old.dentry)) goto out2; error = -EBUSY; + /* + * We want the bottom-most layer of a union mount here - if we + * move that around, all the layers on top move with it. + */ + while (union_down_one(&new.mnt, &new.dentry)) + ; + while (union_down_one(&root.mnt, &root.dentry)) + ; if (new.mnt == root.mnt || old.mnt == root.mnt) goto out2; /* loop, on the same file system */ diff --git a/fs/union.c b/fs/union.c index 6823081..ed852e5 100644 --- a/fs/union.c +++ b/fs/union.c @@ -114,6 +114,7 @@ struct union_mount *union_alloc(struct dentry *this, struct vfsmount *this_mnt, atomic_set(&um->u_count, 1); INIT_LIST_HEAD(&um->u_unions); + INIT_LIST_HEAD(&um->u_list); INIT_HLIST_NODE(&um->u_hash); INIT_HLIST_NODE(&um->u_rhash); @@ -275,6 +276,7 @@ int append_to_union(struct vfsmount *upper_mnt, struct dentry *upper_dentry, union_put(new); return 0; } + list_add(&new->u_list, &upper_mnt->mnt_unions); list_add(&new->u_unions, &upper_dentry->d_unions); lower_dentry->d_unionized++; __union_hash(new); @@ -374,6 +376,7 @@ repeat: list_for_each_entry_safe(this, next, &dentry->d_unions, u_unions) { BUG_ON(!hlist_unhashed(&this->u_hash)); BUG_ON(!hlist_unhashed(&this->u_rhash)); + list_del(&this->u_list); list_del(&this->u_unions); this->u_next.dentry->d_unionized--; spin_unlock(&union_lock); @@ -384,6 +387,59 @@ repeat: } /* + * Remove all union_mounts structures belonging to this vfsmount from the + * union lookup hashtable and so on ... + */ +void shrink_mnt_unions(struct vfsmount *mnt) +{ + struct union_mount *this, *next; + +repeat: + spin_lock(&union_lock); + list_for_each_entry_safe(this, next, &mnt->mnt_unions, u_list) { + if (this->u_this.dentry == mnt->mnt_root) + continue; + __union_unhash(this); + list_del(&this->u_list); + list_del(&this->u_unions); + this->u_next.dentry->d_unionized--; + spin_unlock(&union_lock); + union_put(this); + goto repeat; + } + spin_unlock(&union_lock); +} + +int attach_mnt_union(struct vfsmount *mnt, struct vfsmount *dest_mnt, + struct dentry *dest_dentry) +{ + if (!IS_MNT_UNION(mnt)) + return 0; + + return append_to_union(mnt, mnt->mnt_root, dest_mnt, dest_dentry); +} + +void detach_mnt_union(struct vfsmount *mnt) +{ + struct union_mount *um; + + if (!IS_MNT_UNION(mnt)) + return; + + shrink_mnt_unions(mnt); + + spin_lock(&union_lock); + um = union_lookup(mnt->mnt_root, mnt); + __union_unhash(um); + list_del(&um->u_list); + list_del(&um->u_unions); + um->u_next.dentry->d_unionized--; + spin_unlock(&union_lock); + union_put(um); + return; +} + +/* * union_create_topmost_dir - Create a matching dir in the topmost file system */ diff --git a/include/linux/union.h b/include/linux/union.h index 938b15a..6eaeae8 100644 --- a/include/linux/union.h +++ b/include/linux/union.h @@ -50,6 +50,9 @@ extern void __d_drop_unions(struct dentry *); extern void shrink_d_unions(struct dentry *); extern struct dentry * union_create_topmost_dir(struct path *, struct qstr *, struct path *); +extern int attach_mnt_union(struct vfsmount *, struct vfsmount *, + struct dentry *); +extern void detach_mnt_union(struct vfsmount *); #else /* CONFIG_UNION_MOUNT */ @@ -61,6 +64,8 @@ extern struct dentry * union_create_topmost_dir(struct path *, struct qstr *, #define __d_drop_unions(x) do { } while (0) #define shrink_d_unions(x) do { } while (0) #define union_create_topmost_dir(x, y, z) ({ BUG(); (NULL); }) +#define attach_mnt_union(x, y, z) do { } while (0) +#define detach_mnt_union(x) do { } while (0) #endif /* CONFIG_UNION_MOUNT */ #endif /* __KERNEL__ */ -- 1.5.6.5 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html