> > If you get down to it, the thing is about delegating control over part > > of namespace to somebody, without letting them control, see, etc. the > > rest of it. So I'd rather be very conservative about extra information > > we allow to piggyback on that. I don't know... perhaps with stable peer > > group IDs it would be OK to show peer group ID by (our) vfsmount + peer > > group ID of master + peer group ID of nearest dominating group that has > > intersection with our namespace. Then we don't leak information (AFAICS), > > get full propagation information between our vfsmounts and cooperating > > tasks in different namespaces can figure the things out as much as possible > > without leaking 3rd-party information to either. > Here's a patch against current -mm implementing this (with some cleanups thrown in). Done some testing on it as well, it wasn't entirey trivial to figure out a setup, where propagation goes out of the namespace first, then comes back in: mount --bind /mnt1 /mnt1 mount --make-shared /mnt1 mount --bind /mnt2 /mnt2 mount --make-shared /mnt2 newns mount --make-slave /mnt1 old ns: mount --make-slave /mnt2 mount --bind /mnt1/tmp /mnt1/tmp new ns: mount --make-shared /mnt1/tmp mount --bind /mnt1/tmp /mnt2/tmp Voila. Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx> --- Index: linux/fs/pnode.c =================================================================== --- linux.orig/fs/pnode.c 2008-02-22 15:27:23.000000000 +0100 +++ linux/fs/pnode.c 2008-02-22 15:27:26.000000000 +0100 @@ -9,8 +9,12 @@ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/fs.h> +#include <linux/idr.h> #include "pnode.h" +static DEFINE_SPINLOCK(mnt_pgid_lock); +static DEFINE_IDA(mnt_pgid_ida); + /* return the next shared peer mount of @p */ static inline struct vfsmount *next_peer(struct vfsmount *p) { @@ -27,36 +31,90 @@ static inline struct vfsmount *next_slav return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); } -static int __peer_group_id(struct vfsmount *mnt) +static void __set_mnt_shared(struct vfsmount *mnt) { - struct vfsmount *m; - int id = mnt->mnt_id; + mnt->mnt_flags &= ~MNT_PNODE_MASK; + mnt->mnt_flags |= MNT_SHARED; +} + +void set_mnt_shared(struct vfsmount *mnt) +{ + int res; - for (m = next_peer(mnt); m != mnt; m = next_peer(m)) - id = min(id, m->mnt_id); + retry: + spin_lock(&mnt_pgid_lock); + if (IS_MNT_SHARED(mnt)) { + spin_unlock(&mnt_pgid_lock); + return; + } - return id; + res = ida_get_new(&mnt_pgid_ida, &mnt->mnt_pgid); + spin_unlock(&mnt_pgid_lock); + if (res == -EAGAIN) { + if (ida_pre_get(&mnt_pgid_ida, GFP_KERNEL)) + goto retry; + } + __set_mnt_shared(mnt); +} + +void clear_mnt_shared(struct vfsmount *mnt) +{ + if (IS_MNT_SHARED(mnt)) { + mnt->mnt_flags &= ~MNT_SHARED; + mnt->mnt_pgid = -1; + } +} + +void make_mnt_peer(struct vfsmount *old, struct vfsmount *mnt) +{ + mnt->mnt_pgid = old->mnt_pgid; + list_add(&mnt->mnt_share, &old->mnt_share); + __set_mnt_shared(mnt); } -/* return the smallest ID within the peer group */ int get_peer_group_id(struct vfsmount *mnt) { + return mnt->mnt_pgid; +} + +int get_master_id(struct vfsmount *mnt) +{ int id; spin_lock(&vfsmount_lock); - id = __peer_group_id(mnt); + id = get_peer_group_id(mnt->mnt_master); spin_unlock(&vfsmount_lock); return id; } -/* return the smallest ID within the master's peer group */ -int get_master_id(struct vfsmount *mnt) +static struct vfsmount *get_peer_in_ns(struct vfsmount *mnt, + struct mnt_namespace *ns) { - int id; + struct vfsmount *m = mnt; + + do { + if (m->mnt_ns == ns) + return m; + m = next_peer(m); + } while (m != mnt); + + return NULL; +} + +int get_dominator_id_same_ns(struct vfsmount *mnt) +{ + int id = -1; + struct vfsmount *m; spin_lock(&vfsmount_lock); - id = __peer_group_id(mnt->mnt_master); + for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { + struct vfsmount *d = get_peer_in_ns(m, mnt->mnt_ns); + if (d) { + id = d->mnt_pgid; + break; + } + } spin_unlock(&vfsmount_lock); return id; @@ -80,7 +138,13 @@ static int do_make_slave(struct vfsmount if (peer_mnt == mnt) peer_mnt = NULL; } - list_del_init(&mnt->mnt_share); + if (!list_empty(&mnt->mnt_share)) + list_del_init(&mnt->mnt_share); + else if (IS_MNT_SHARED(mnt)) { + spin_lock(&mnt_pgid_lock); + ida_remove(&mnt_pgid_ida, mnt->mnt_pgid); + spin_unlock(&mnt_pgid_lock); + } if (peer_mnt) master = peer_mnt; @@ -89,20 +153,18 @@ static int do_make_slave(struct vfsmount list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) slave_mnt->mnt_master = master; list_move(&mnt->mnt_slave, &master->mnt_slave_list); - list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); - INIT_LIST_HEAD(&mnt->mnt_slave_list); + list_splice_init(&mnt->mnt_slave_list, + master->mnt_slave_list.prev); } else { - struct list_head *p = &mnt->mnt_slave_list; - while (!list_empty(p)) { - slave_mnt = list_first_entry(p, + while (!list_empty(&mnt->mnt_slave_list)) { + slave_mnt = list_first_entry(&mnt->mnt_slave_list, struct vfsmount, mnt_slave); list_del_init(&slave_mnt->mnt_slave); slave_mnt->mnt_master = NULL; } } mnt->mnt_master = master; - CLEAR_MNT_SHARED(mnt); - INIT_LIST_HEAD(&mnt->mnt_slave_list); + clear_mnt_shared(mnt); return 0; } Index: linux/fs/namespace.c =================================================================== --- linux.orig/fs/namespace.c 2008-02-22 15:27:23.000000000 +0100 +++ linux/fs/namespace.c 2008-02-22 15:27:26.000000000 +0100 @@ -95,6 +95,7 @@ struct vfsmount *alloc_vfsmnt(const char return NULL; } + mnt->mnt_pgid = -1; atomic_set(&mnt->mnt_count, 1); INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); @@ -537,10 +538,12 @@ static struct vfsmount *clone_mnt(struct if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); mnt->mnt_master = old; - CLEAR_MNT_SHARED(mnt); + clear_mnt_shared(mnt); } else if (!(flag & CL_PRIVATE)) { - if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old)) - list_add(&mnt->mnt_share, &old->mnt_share); + if (flag & CL_PROPAGATION) + set_mnt_shared(old); + if (IS_MNT_SHARED(old)) + make_mnt_peer(old, mnt); if (IS_MNT_SLAVE(old)) list_add(&mnt->mnt_slave, &old->mnt_slave); mnt->mnt_master = old->mnt_master; @@ -795,16 +798,24 @@ static int show_mountinfo(struct seq_fil show_sb_opts(m, sb); if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt); - if (IS_MNT_SHARED(mnt)) { - seq_printf(m, " shared:%i", get_peer_group_id(mnt)); - if (IS_MNT_SLAVE(mnt)) - seq_printf(m, ",slave:%i", get_master_id(mnt)); - } else if (IS_MNT_SLAVE(mnt)) { - seq_printf(m, " slave:%i", get_master_id(mnt)); + seq_putc(m, ' '); + if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) { + if (IS_MNT_SHARED(mnt)) + seq_printf(m, "shared:%i", get_peer_group_id(mnt)); + if (IS_MNT_SLAVE(mnt)) { + int dominator_id = get_dominator_id_same_ns(mnt); + + if (IS_MNT_SHARED(mnt)) + seq_putc(m, ','); + + seq_printf(m, "slave:%i", get_master_id(mnt)); + if (dominator_id != -1) + seq_printf(m, ":%i", dominator_id); + } } else if (IS_MNT_UNBINDABLE(mnt)) { - seq_printf(m, " unbindable"); + seq_printf(m, "unbindable"); } else { - seq_printf(m, " private"); + seq_printf(m, "private"); } seq_putc(m, '\n'); return err; Index: linux/fs/pnode.h =================================================================== --- linux.orig/fs/pnode.h 2008-02-22 15:27:23.000000000 +0100 +++ linux/fs/pnode.h 2008-02-22 15:27:26.000000000 +0100 @@ -14,7 +14,6 @@ #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(mnt) (mnt->mnt_master) #define IS_MNT_NEW(mnt) (!mnt->mnt_ns) -#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE) #define CL_EXPIRE 0x01 @@ -24,12 +23,9 @@ #define CL_PROPAGATION 0x10 #define CL_PRIVATE 0x20 -static inline void set_mnt_shared(struct vfsmount *mnt) -{ - mnt->mnt_flags &= ~MNT_PNODE_MASK; - mnt->mnt_flags |= MNT_SHARED; -} - +void set_mnt_shared(struct vfsmount *); +void clear_mnt_shared(struct vfsmount *); +void make_mnt_peer(struct vfsmount *, struct vfsmount *); void change_mnt_propagation(struct vfsmount *, int); int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, struct list_head *); @@ -37,4 +33,5 @@ int propagate_umount(struct list_head *) int propagate_mount_busy(struct vfsmount *, int); int get_peer_group_id(struct vfsmount *); int get_master_id(struct vfsmount *); +int get_dominator_id_same_ns(struct vfsmount *); #endif /* _LINUX_PNODE_H */ Index: linux/include/linux/mount.h =================================================================== --- linux.orig/include/linux/mount.h 2008-02-22 15:27:23.000000000 +0100 +++ linux/include/linux/mount.h 2008-02-22 15:27:26.000000000 +0100 @@ -57,6 +57,7 @@ struct vfsmount { struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ struct mnt_namespace *mnt_ns; /* containing namespace */ int mnt_id; /* mount identifier */ + int mnt_pgid; /* peer group identifier */ /* * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount * to let these frequently modified fields in a separate cache line Index: linux/Documentation/filesystems/proc.txt =================================================================== --- linux.orig/Documentation/filesystems/proc.txt 2008-02-22 15:27:23.000000000 +0100 +++ linux/Documentation/filesystems/proc.txt 2008-02-22 15:27:26.000000000 +0100 @@ -2367,21 +2367,20 @@ MNTOPTS: per mount options SBOPTS: per super block options PROPAGATION: propagation type -propagation type: <propagation_flag>[:<mntid>][,...] - note: 'shared' flag is followed by the mntid of its peer mount - 'slave' flag is followed by the mntid of its master mount +propagation type: <propagation_flag>[:<peergrpid>[:<domgrpid>]][,...] + note: 'shared' flag is followed by the id of this mount's peer group + 'slave' flag is followed by the peer group id of its master mount, + optionally followed by the id of the closest dominant(*) + peer group in the same namespace, if one exists. 'private' flag stands by itself 'unbindable' flag stands by itself -The 'mntid' used in the propagation type is a canonical ID of the peer -group (currently the smallest ID within the group is used for this -purpose, but this should not be relied on). Since mounts can be added -or removed from the peer group, this ID only guaranteed to stay the -same on a static propagation tree. +(*) A dominant peer group is an ancestor of this mount in the +propagation tree, in other words, this mount receives propagation from +the dominant peer group, but not the other way round. For more information see: Documentation/filesystems/sharedsubtree.txt - ------------------------------------------------------------------------------ - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html