Quoting Miklos Szeredi (miklos@xxxxxxxxxx): > > > If you get down to it, the thing is about delegating control over part > > > of namespace to somebody, without letting them control, see, etc. the > > > rest of it. So I'd rather be very conservative about extra information > > > we allow to piggyback on that. I don't know... perhaps with stable peer > > > group IDs it would be OK to show peer group ID by (our) vfsmount + peer > > > group ID of master + peer group ID of nearest dominating group that has > > > intersection with our namespace. Then we don't leak information (AFAICS), > > > get full propagation information between our vfsmounts and cooperating > > > tasks in different namespaces can figure the things out as much as possible > > > without leaking 3rd-party information to either. > > > > Here's a patch against current -mm implementing this (with some > cleanups thrown in). Done some testing on it as well, it wasn't > entirey trivial to figure out a setup, where propagation goes out of > the namespace first, then comes back in: Looks nice, and a bit of testing/playing around showed no problem on my end. This definately would be a nice feature to have, and heck, it might greatly simplify an LTP testcase for mounts propagation which is long overdue. thanks, -serge > mount --bind /mnt1 /mnt1 > mount --make-shared /mnt1 > mount --bind /mnt2 /mnt2 > mount --make-shared /mnt2 > newns > mount --make-slave /mnt1 > > old ns: > mount --make-slave /mnt2 > mount --bind /mnt1/tmp /mnt1/tmp > > new ns: > mount --make-shared /mnt1/tmp > mount --bind /mnt1/tmp /mnt2/tmp > > Voila. > > > Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx> > --- > > Index: linux/fs/pnode.c > =================================================================== > --- linux.orig/fs/pnode.c 2008-02-22 15:27:23.000000000 +0100 > +++ linux/fs/pnode.c 2008-02-22 15:27:26.000000000 +0100 > @@ -9,8 +9,12 @@ > #include <linux/mnt_namespace.h> > #include <linux/mount.h> > #include <linux/fs.h> > +#include <linux/idr.h> > #include "pnode.h" > > +static DEFINE_SPINLOCK(mnt_pgid_lock); > +static DEFINE_IDA(mnt_pgid_ida); > + > /* return the next shared peer mount of @p */ > static inline struct vfsmount *next_peer(struct vfsmount *p) > { > @@ -27,36 +31,90 @@ static inline struct vfsmount *next_slav > return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); > } > > -static int __peer_group_id(struct vfsmount *mnt) > +static void __set_mnt_shared(struct vfsmount *mnt) > { > - struct vfsmount *m; > - int id = mnt->mnt_id; > + mnt->mnt_flags &= ~MNT_PNODE_MASK; > + mnt->mnt_flags |= MNT_SHARED; > +} > + > +void set_mnt_shared(struct vfsmount *mnt) > +{ > + int res; > > - for (m = next_peer(mnt); m != mnt; m = next_peer(m)) > - id = min(id, m->mnt_id); > + retry: > + spin_lock(&mnt_pgid_lock); > + if (IS_MNT_SHARED(mnt)) { > + spin_unlock(&mnt_pgid_lock); > + return; > + } > > - return id; > + res = ida_get_new(&mnt_pgid_ida, &mnt->mnt_pgid); > + spin_unlock(&mnt_pgid_lock); > + if (res == -EAGAIN) { > + if (ida_pre_get(&mnt_pgid_ida, GFP_KERNEL)) > + goto retry; > + } > + __set_mnt_shared(mnt); > +} > + > +void clear_mnt_shared(struct vfsmount *mnt) > +{ > + if (IS_MNT_SHARED(mnt)) { > + mnt->mnt_flags &= ~MNT_SHARED; > + mnt->mnt_pgid = -1; > + } > +} > + > +void make_mnt_peer(struct vfsmount *old, struct vfsmount *mnt) > +{ > + mnt->mnt_pgid = old->mnt_pgid; > + list_add(&mnt->mnt_share, &old->mnt_share); > + __set_mnt_shared(mnt); > } > > -/* return the smallest ID within the peer group */ > int get_peer_group_id(struct vfsmount *mnt) > { > + return mnt->mnt_pgid; > +} > + > +int get_master_id(struct vfsmount *mnt) > +{ > int id; > > spin_lock(&vfsmount_lock); > - id = __peer_group_id(mnt); > + id = get_peer_group_id(mnt->mnt_master); > spin_unlock(&vfsmount_lock); > > return id; > } > > -/* return the smallest ID within the master's peer group */ > -int get_master_id(struct vfsmount *mnt) > +static struct vfsmount *get_peer_in_ns(struct vfsmount *mnt, > + struct mnt_namespace *ns) > { > - int id; > + struct vfsmount *m = mnt; > + > + do { > + if (m->mnt_ns == ns) > + return m; > + m = next_peer(m); > + } while (m != mnt); > + > + return NULL; > +} > + > +int get_dominator_id_same_ns(struct vfsmount *mnt) > +{ > + int id = -1; > + struct vfsmount *m; > > spin_lock(&vfsmount_lock); > - id = __peer_group_id(mnt->mnt_master); > + for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { > + struct vfsmount *d = get_peer_in_ns(m, mnt->mnt_ns); > + if (d) { > + id = d->mnt_pgid; > + break; > + } > + } > spin_unlock(&vfsmount_lock); > > return id; > @@ -80,7 +138,13 @@ static int do_make_slave(struct vfsmount > if (peer_mnt == mnt) > peer_mnt = NULL; > } > - list_del_init(&mnt->mnt_share); > + if (!list_empty(&mnt->mnt_share)) > + list_del_init(&mnt->mnt_share); > + else if (IS_MNT_SHARED(mnt)) { > + spin_lock(&mnt_pgid_lock); > + ida_remove(&mnt_pgid_ida, mnt->mnt_pgid); > + spin_unlock(&mnt_pgid_lock); > + } > > if (peer_mnt) > master = peer_mnt; > @@ -89,20 +153,18 @@ static int do_make_slave(struct vfsmount > list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) > slave_mnt->mnt_master = master; > list_move(&mnt->mnt_slave, &master->mnt_slave_list); > - list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); > - INIT_LIST_HEAD(&mnt->mnt_slave_list); > + list_splice_init(&mnt->mnt_slave_list, > + master->mnt_slave_list.prev); > } else { > - struct list_head *p = &mnt->mnt_slave_list; > - while (!list_empty(p)) { > - slave_mnt = list_first_entry(p, > + while (!list_empty(&mnt->mnt_slave_list)) { > + slave_mnt = list_first_entry(&mnt->mnt_slave_list, > struct vfsmount, mnt_slave); > list_del_init(&slave_mnt->mnt_slave); > slave_mnt->mnt_master = NULL; > } > } > mnt->mnt_master = master; > - CLEAR_MNT_SHARED(mnt); > - INIT_LIST_HEAD(&mnt->mnt_slave_list); > + clear_mnt_shared(mnt); > return 0; > } > > Index: linux/fs/namespace.c > =================================================================== > --- linux.orig/fs/namespace.c 2008-02-22 15:27:23.000000000 +0100 > +++ linux/fs/namespace.c 2008-02-22 15:27:26.000000000 +0100 > @@ -95,6 +95,7 @@ struct vfsmount *alloc_vfsmnt(const char > return NULL; > } > > + mnt->mnt_pgid = -1; > atomic_set(&mnt->mnt_count, 1); > INIT_LIST_HEAD(&mnt->mnt_hash); > INIT_LIST_HEAD(&mnt->mnt_child); > @@ -537,10 +538,12 @@ static struct vfsmount *clone_mnt(struct > if (flag & CL_SLAVE) { > list_add(&mnt->mnt_slave, &old->mnt_slave_list); > mnt->mnt_master = old; > - CLEAR_MNT_SHARED(mnt); > + clear_mnt_shared(mnt); > } else if (!(flag & CL_PRIVATE)) { > - if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old)) > - list_add(&mnt->mnt_share, &old->mnt_share); > + if (flag & CL_PROPAGATION) > + set_mnt_shared(old); > + if (IS_MNT_SHARED(old)) > + make_mnt_peer(old, mnt); > if (IS_MNT_SLAVE(old)) > list_add(&mnt->mnt_slave, &old->mnt_slave); > mnt->mnt_master = old->mnt_master; > @@ -795,16 +798,24 @@ static int show_mountinfo(struct seq_fil > show_sb_opts(m, sb); > if (sb->s_op->show_options) > err = sb->s_op->show_options(m, mnt); > - if (IS_MNT_SHARED(mnt)) { > - seq_printf(m, " shared:%i", get_peer_group_id(mnt)); > - if (IS_MNT_SLAVE(mnt)) > - seq_printf(m, ",slave:%i", get_master_id(mnt)); > - } else if (IS_MNT_SLAVE(mnt)) { > - seq_printf(m, " slave:%i", get_master_id(mnt)); > + seq_putc(m, ' '); > + if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) { > + if (IS_MNT_SHARED(mnt)) > + seq_printf(m, "shared:%i", get_peer_group_id(mnt)); > + if (IS_MNT_SLAVE(mnt)) { > + int dominator_id = get_dominator_id_same_ns(mnt); > + > + if (IS_MNT_SHARED(mnt)) > + seq_putc(m, ','); > + > + seq_printf(m, "slave:%i", get_master_id(mnt)); > + if (dominator_id != -1) > + seq_printf(m, ":%i", dominator_id); > + } > } else if (IS_MNT_UNBINDABLE(mnt)) { > - seq_printf(m, " unbindable"); > + seq_printf(m, "unbindable"); > } else { > - seq_printf(m, " private"); > + seq_printf(m, "private"); > } > seq_putc(m, '\n'); > return err; > Index: linux/fs/pnode.h > =================================================================== > --- linux.orig/fs/pnode.h 2008-02-22 15:27:23.000000000 +0100 > +++ linux/fs/pnode.h 2008-02-22 15:27:26.000000000 +0100 > @@ -14,7 +14,6 @@ > #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) > #define IS_MNT_SLAVE(mnt) (mnt->mnt_master) > #define IS_MNT_NEW(mnt) (!mnt->mnt_ns) > -#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED) > #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE) > > #define CL_EXPIRE 0x01 > @@ -24,12 +23,9 @@ > #define CL_PROPAGATION 0x10 > #define CL_PRIVATE 0x20 > > -static inline void set_mnt_shared(struct vfsmount *mnt) > -{ > - mnt->mnt_flags &= ~MNT_PNODE_MASK; > - mnt->mnt_flags |= MNT_SHARED; > -} > - > +void set_mnt_shared(struct vfsmount *); > +void clear_mnt_shared(struct vfsmount *); > +void make_mnt_peer(struct vfsmount *, struct vfsmount *); > void change_mnt_propagation(struct vfsmount *, int); > int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, > struct list_head *); > @@ -37,4 +33,5 @@ int propagate_umount(struct list_head *) > int propagate_mount_busy(struct vfsmount *, int); > int get_peer_group_id(struct vfsmount *); > int get_master_id(struct vfsmount *); > +int get_dominator_id_same_ns(struct vfsmount *); > #endif /* _LINUX_PNODE_H */ > Index: linux/include/linux/mount.h > =================================================================== > --- linux.orig/include/linux/mount.h 2008-02-22 15:27:23.000000000 +0100 > +++ linux/include/linux/mount.h 2008-02-22 15:27:26.000000000 +0100 > @@ -57,6 +57,7 @@ struct vfsmount { > struct vfsmount *mnt_master; /* slave is on master->mnt_slave_list */ > struct mnt_namespace *mnt_ns; /* containing namespace */ > int mnt_id; /* mount identifier */ > + int mnt_pgid; /* peer group identifier */ > /* > * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount > * to let these frequently modified fields in a separate cache line > Index: linux/Documentation/filesystems/proc.txt > =================================================================== > --- linux.orig/Documentation/filesystems/proc.txt 2008-02-22 15:27:23.000000000 +0100 > +++ linux/Documentation/filesystems/proc.txt 2008-02-22 15:27:26.000000000 +0100 > @@ -2367,21 +2367,20 @@ MNTOPTS: per mount options > SBOPTS: per super block options > PROPAGATION: propagation type > > -propagation type: <propagation_flag>[:<mntid>][,...] > - note: 'shared' flag is followed by the mntid of its peer mount > - 'slave' flag is followed by the mntid of its master mount > +propagation type: <propagation_flag>[:<peergrpid>[:<domgrpid>]][,...] > + note: 'shared' flag is followed by the id of this mount's peer group > + 'slave' flag is followed by the peer group id of its master mount, > + optionally followed by the id of the closest dominant(*) > + peer group in the same namespace, if one exists. > 'private' flag stands by itself > 'unbindable' flag stands by itself > > -The 'mntid' used in the propagation type is a canonical ID of the peer > -group (currently the smallest ID within the group is used for this > -purpose, but this should not be relied on). Since mounts can be added > -or removed from the peer group, this ID only guaranteed to stay the > -same on a static propagation tree. > +(*) A dominant peer group is an ancestor of this mount in the > +propagation tree, in other words, this mount receives propagation from > +the dominant peer group, but not the other way round. > > For more information see: > > Documentation/filesystems/sharedsubtree.txt > > - > ------------------------------------------------------------------------------ > - > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html