Re: [rfc patch] how to show propagation state for mounts

"Serge E. Hallyn" <serue@xxxxxxxxxx> · Wed, 5 Mar 2008 13:25:59 -0600

Quoting Miklos Szeredi (miklos@xxxxxxxxxx):
> > > If you get down to it, the thing is about delegating control over part
> > > of namespace to somebody, without letting them control, see, etc. the
> > > rest of it.  So I'd rather be very conservative about extra information
> > > we allow to piggyback on that.  I don't know... perhaps with stable peer
> > > group IDs it would be OK to show peer group ID by (our) vfsmount + peer
> > > group ID of master + peer group ID of nearest dominating group that has
> > > intersection with our namespace.  Then we don't leak information (AFAICS),
> > > get full propagation information between our vfsmounts and cooperating
> > > tasks in different namespaces can figure the things out as much as possible
> > > without leaking 3rd-party information to either.
> > 
> 
> Here's a patch against current -mm implementing this (with some
> cleanups thrown in).  Done some testing on it as well, it wasn't
> entirey trivial to figure out a setup, where propagation goes out of
> the namespace first, then comes back in:

Looks nice, and a bit of testing/playing around showed no problem on my
end.

This definately would be a nice feature to have, and heck, it might
greatly simplify an LTP testcase for mounts propagation which is long
overdue.

thanks,
-serge

>   mount --bind /mnt1 /mnt1
>   mount --make-shared /mnt1
>   mount --bind /mnt2 /mnt2
>   mount --make-shared /mnt2
>   newns
>   mount --make-slave /mnt1
> 
> old ns:
>   mount --make-slave /mnt2
>   mount --bind /mnt1/tmp /mnt1/tmp
> 
> new ns:
>   mount --make-shared /mnt1/tmp
>   mount --bind /mnt1/tmp /mnt2/tmp
> 
> Voila.
> 
> 
> Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
> ---
> 
> Index: linux/fs/pnode.c
> ===================================================================
> --- linux.orig/fs/pnode.c	2008-02-22 15:27:23.000000000 +0100
> +++ linux/fs/pnode.c	2008-02-22 15:27:26.000000000 +0100
> @@ -9,8 +9,12 @@
>  #include <linux/mnt_namespace.h>
>  #include <linux/mount.h>
>  #include <linux/fs.h>
> +#include <linux/idr.h>
>  #include "pnode.h"
> 
> +static DEFINE_SPINLOCK(mnt_pgid_lock);
> +static DEFINE_IDA(mnt_pgid_ida);
> +
>  /* return the next shared peer mount of @p */
>  static inline struct vfsmount *next_peer(struct vfsmount *p)
>  {
> @@ -27,36 +31,90 @@ static inline struct vfsmount *next_slav
>  	return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
>  }
> 
> -static int __peer_group_id(struct vfsmount *mnt)
> +static void __set_mnt_shared(struct vfsmount *mnt)
>  {
> -	struct vfsmount *m;
> -	int id = mnt->mnt_id;
> +	mnt->mnt_flags &= ~MNT_PNODE_MASK;
> +	mnt->mnt_flags |= MNT_SHARED;
> +}
> +
> +void set_mnt_shared(struct vfsmount *mnt)
> +{
> +	int res;
> 
> -	for (m = next_peer(mnt); m != mnt; m = next_peer(m))
> -		id = min(id, m->mnt_id);
> + retry:
> +	spin_lock(&mnt_pgid_lock);
> +	if (IS_MNT_SHARED(mnt)) {
> +		spin_unlock(&mnt_pgid_lock);
> +		return;
> +	}
> 
> -	return id;
> +	res = ida_get_new(&mnt_pgid_ida, &mnt->mnt_pgid);
> +	spin_unlock(&mnt_pgid_lock);
> +	if (res == -EAGAIN) {
> +		if (ida_pre_get(&mnt_pgid_ida, GFP_KERNEL))
> +			goto retry;
> +	}
> +	__set_mnt_shared(mnt);
> +}
> +
> +void clear_mnt_shared(struct vfsmount *mnt)
> +{
> +	if (IS_MNT_SHARED(mnt)) {
> +		mnt->mnt_flags &= ~MNT_SHARED;
> +		mnt->mnt_pgid = -1;
> +	}
> +}
> +
> +void make_mnt_peer(struct vfsmount *old, struct vfsmount *mnt)
> +{
> +	mnt->mnt_pgid = old->mnt_pgid;
> +	list_add(&mnt->mnt_share, &old->mnt_share);
> +	__set_mnt_shared(mnt);
>  }
> 
> -/* return the smallest ID within the peer group */
>  int get_peer_group_id(struct vfsmount *mnt)
>  {
> +	return mnt->mnt_pgid;
> +}
> +
> +int get_master_id(struct vfsmount *mnt)
> +{
>  	int id;
> 
>  	spin_lock(&vfsmount_lock);
> -	id = __peer_group_id(mnt);
> +	id = get_peer_group_id(mnt->mnt_master);
>  	spin_unlock(&vfsmount_lock);
> 
>  	return id;
>  }
> 
> -/* return the smallest ID within the master's peer group */
> -int get_master_id(struct vfsmount *mnt)
> +static struct vfsmount *get_peer_in_ns(struct vfsmount *mnt,
> +				       struct mnt_namespace *ns)
>  {
> -	int id;
> +	struct vfsmount *m = mnt;
> +
> +	do {
> +		if (m->mnt_ns == ns)
> +			return m;
> +		m = next_peer(m);
> +	} while (m != mnt);
> +
> +	return NULL;
> +}
> +
> +int get_dominator_id_same_ns(struct vfsmount *mnt)
> +{
> +	int id = -1;
> +	struct vfsmount *m;
> 
>  	spin_lock(&vfsmount_lock);
> -	id = __peer_group_id(mnt->mnt_master);
> +	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
> +		struct vfsmount *d = get_peer_in_ns(m, mnt->mnt_ns);
> +		if (d) {
> +			id = d->mnt_pgid;
> +			break;
> +		}
> +	}
>  	spin_unlock(&vfsmount_lock);
> 
>  	return id;
> @@ -80,7 +138,13 @@ static int do_make_slave(struct vfsmount
>  		if (peer_mnt == mnt)
>  			peer_mnt = NULL;
>  	}
> -	list_del_init(&mnt->mnt_share);
> +	if (!list_empty(&mnt->mnt_share))
> +		list_del_init(&mnt->mnt_share);
> +	else if (IS_MNT_SHARED(mnt)) {
> +		spin_lock(&mnt_pgid_lock);
> +		ida_remove(&mnt_pgid_ida, mnt->mnt_pgid);
> +		spin_unlock(&mnt_pgid_lock);
> +	}
> 
>  	if (peer_mnt)
>  		master = peer_mnt;
> @@ -89,20 +153,18 @@ static int do_make_slave(struct vfsmount
>  		list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
>  			slave_mnt->mnt_master = master;
>  		list_move(&mnt->mnt_slave, &master->mnt_slave_list);
> -		list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
> -		INIT_LIST_HEAD(&mnt->mnt_slave_list);
> +		list_splice_init(&mnt->mnt_slave_list,
> +				 master->mnt_slave_list.prev);
>  	} else {
> -		struct list_head *p = &mnt->mnt_slave_list;
> -		while (!list_empty(p)) {
> -                        slave_mnt = list_first_entry(p,
> +		while (!list_empty(&mnt->mnt_slave_list)) {
> +			slave_mnt = list_first_entry(&mnt->mnt_slave_list,
>  					struct vfsmount, mnt_slave);
>  			list_del_init(&slave_mnt->mnt_slave);
>  			slave_mnt->mnt_master = NULL;
>  		}
>  	}
>  	mnt->mnt_master = master;
> -	CLEAR_MNT_SHARED(mnt);
> -	INIT_LIST_HEAD(&mnt->mnt_slave_list);
> +	clear_mnt_shared(mnt);
>  	return 0;
>  }
> 
> Index: linux/fs/namespace.c
> ===================================================================
> --- linux.orig/fs/namespace.c	2008-02-22 15:27:23.000000000 +0100
> +++ linux/fs/namespace.c	2008-02-22 15:27:26.000000000 +0100
> @@ -95,6 +95,7 @@ struct vfsmount *alloc_vfsmnt(const char
>  			return NULL;
>  		}
> 
> +		mnt->mnt_pgid = -1;
>  		atomic_set(&mnt->mnt_count, 1);
>  		INIT_LIST_HEAD(&mnt->mnt_hash);
>  		INIT_LIST_HEAD(&mnt->mnt_child);
> @@ -537,10 +538,12 @@ static struct vfsmount *clone_mnt(struct
>  		if (flag & CL_SLAVE) {
>  			list_add(&mnt->mnt_slave, &old->mnt_slave_list);
>  			mnt->mnt_master = old;
> -			CLEAR_MNT_SHARED(mnt);
> +			clear_mnt_shared(mnt);
>  		} else if (!(flag & CL_PRIVATE)) {
> -			if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
> -				list_add(&mnt->mnt_share, &old->mnt_share);
> +			if (flag & CL_PROPAGATION)
> +				set_mnt_shared(old);
> +			if (IS_MNT_SHARED(old))
> +				make_mnt_peer(old, mnt);
>  			if (IS_MNT_SLAVE(old))
>  				list_add(&mnt->mnt_slave, &old->mnt_slave);
>  			mnt->mnt_master = old->mnt_master;
> @@ -795,16 +798,24 @@ static int show_mountinfo(struct seq_fil
>  	show_sb_opts(m, sb);
>  	if (sb->s_op->show_options)
>  		err = sb->s_op->show_options(m, mnt);
> -	if (IS_MNT_SHARED(mnt)) {
> -		seq_printf(m, " shared:%i", get_peer_group_id(mnt));
> -		if (IS_MNT_SLAVE(mnt))
> -			seq_printf(m, ",slave:%i", get_master_id(mnt));
> -	} else if (IS_MNT_SLAVE(mnt)) {
> -		seq_printf(m, " slave:%i", get_master_id(mnt));
> +	seq_putc(m, ' ');
> +	if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) {
> +		if (IS_MNT_SHARED(mnt))
> +			seq_printf(m, "shared:%i", get_peer_group_id(mnt));
> +		if (IS_MNT_SLAVE(mnt)) {
> +			int dominator_id = get_dominator_id_same_ns(mnt);
> +
> +			if (IS_MNT_SHARED(mnt))
> +				seq_putc(m, ',');
> +
> +			seq_printf(m, "slave:%i", get_master_id(mnt));
> +			if (dominator_id != -1)
> +				seq_printf(m, ":%i", dominator_id);
> +		}
>  	} else if (IS_MNT_UNBINDABLE(mnt)) {
> -		seq_printf(m, " unbindable");
> +		seq_printf(m, "unbindable");
>  	} else {
> -		seq_printf(m, " private");
> +		seq_printf(m, "private");
>  	}
>  	seq_putc(m, '\n');
>  	return err;
> Index: linux/fs/pnode.h
> ===================================================================
> --- linux.orig/fs/pnode.h	2008-02-22 15:27:23.000000000 +0100
> +++ linux/fs/pnode.h	2008-02-22 15:27:26.000000000 +0100
> @@ -14,7 +14,6 @@
>  #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
>  #define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
>  #define IS_MNT_NEW(mnt)  (!mnt->mnt_ns)
> -#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
>  #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
> 
>  #define CL_EXPIRE    		0x01
> @@ -24,12 +23,9 @@
>  #define CL_PROPAGATION 		0x10
>  #define CL_PRIVATE 		0x20
> 
> -static inline void set_mnt_shared(struct vfsmount *mnt)
> -{
> -	mnt->mnt_flags &= ~MNT_PNODE_MASK;
> -	mnt->mnt_flags |= MNT_SHARED;
> -}
> -
> +void set_mnt_shared(struct vfsmount *);
> +void clear_mnt_shared(struct vfsmount *);
> +void make_mnt_peer(struct vfsmount *, struct vfsmount *);
>  void change_mnt_propagation(struct vfsmount *, int);
>  int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
>  		struct list_head *);
> @@ -37,4 +33,5 @@ int propagate_umount(struct list_head *)
>  int propagate_mount_busy(struct vfsmount *, int);
>  int get_peer_group_id(struct vfsmount *);
>  int get_master_id(struct vfsmount *);
> +int get_dominator_id_same_ns(struct vfsmount *);
>  #endif /* _LINUX_PNODE_H */
> Index: linux/include/linux/mount.h
> ===================================================================
> --- linux.orig/include/linux/mount.h	2008-02-22 15:27:23.000000000 +0100
> +++ linux/include/linux/mount.h	2008-02-22 15:27:26.000000000 +0100
> @@ -57,6 +57,7 @@ struct vfsmount {
>  	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
>  	struct mnt_namespace *mnt_ns;	/* containing namespace */
>  	int mnt_id;			/* mount identifier */
> +	int mnt_pgid;			/* peer group identifier */
>  	/*
>  	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
>  	 * to let these frequently modified fields in a separate cache line
> Index: linux/Documentation/filesystems/proc.txt
> ===================================================================
> --- linux.orig/Documentation/filesystems/proc.txt	2008-02-22 15:27:23.000000000 +0100
> +++ linux/Documentation/filesystems/proc.txt	2008-02-22 15:27:26.000000000 +0100
> @@ -2367,21 +2367,20 @@ MNTOPTS: per mount options
>  SBOPTS: per super block options
>  PROPAGATION: propagation type
> 
> -propagation type: <propagation_flag>[:<mntid>][,...]
> -	note: 'shared' flag is followed by the mntid of its peer mount
> -	      'slave' flag is followed by the mntid of its master mount
> +propagation type: <propagation_flag>[:<peergrpid>[:<domgrpid>]][,...]
> +	note: 'shared' flag is followed by the id of this mount's peer group
> +	      'slave' flag is followed by the peer group id of its master mount,
> +	      	      optionally followed by the id of the closest dominant(*)
> +		      peer group in the same namespace, if one exists.
>  	      'private' flag stands by itself
>  	      'unbindable' flag stands by itself
> 
> -The 'mntid' used in the propagation type is a canonical ID of the peer
> -group (currently the smallest ID within the group is used for this
> -purpose, but this should not be relied on).  Since mounts can be added
> -or removed from the peer group, this ID only guaranteed to stay the
> -same on a static propagation tree.
> +(*) A dominant peer group is an ancestor of this mount in the
> +propagation tree, in other words, this mount receives propagation from
> +the dominant peer group, but not the other way round.
> 
>  For more information see:
> 
>    Documentation/filesystems/sharedsubtree.txt
> 
> -
>  ------------------------------------------------------------------------------
> -
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html