[rfc patch] how to show propagation state for mounts

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> > If you get down to it, the thing is about delegating control over part
> > of namespace to somebody, without letting them control, see, etc. the
> > rest of it.  So I'd rather be very conservative about extra information
> > we allow to piggyback on that.  I don't know... perhaps with stable peer
> > group IDs it would be OK to show peer group ID by (our) vfsmount + peer
> > group ID of master + peer group ID of nearest dominating group that has
> > intersection with our namespace.  Then we don't leak information (AFAICS),
> > get full propagation information between our vfsmounts and cooperating
> > tasks in different namespaces can figure the things out as much as possible
> > without leaking 3rd-party information to either.
> 

Here's a patch against current -mm implementing this (with some
cleanups thrown in).  Done some testing on it as well, it wasn't
entirey trivial to figure out a setup, where propagation goes out of
the namespace first, then comes back in:

  mount --bind /mnt1 /mnt1
  mount --make-shared /mnt1
  mount --bind /mnt2 /mnt2
  mount --make-shared /mnt2
  newns
  mount --make-slave /mnt1

old ns:
  mount --make-slave /mnt2
  mount --bind /mnt1/tmp /mnt1/tmp

new ns:
  mount --make-shared /mnt1/tmp
  mount --bind /mnt1/tmp /mnt2/tmp

Voila.


Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxx>
---

Index: linux/fs/pnode.c
===================================================================
--- linux.orig/fs/pnode.c	2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/pnode.c	2008-02-22 15:27:26.000000000 +0100
@@ -9,8 +9,12 @@
 #include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/fs.h>
+#include <linux/idr.h>
 #include "pnode.h"
 
+static DEFINE_SPINLOCK(mnt_pgid_lock);
+static DEFINE_IDA(mnt_pgid_ida);
+
 /* return the next shared peer mount of @p */
 static inline struct vfsmount *next_peer(struct vfsmount *p)
 {
@@ -27,36 +31,90 @@ static inline struct vfsmount *next_slav
 	return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave);
 }
 
-static int __peer_group_id(struct vfsmount *mnt)
+static void __set_mnt_shared(struct vfsmount *mnt)
 {
-	struct vfsmount *m;
-	int id = mnt->mnt_id;
+	mnt->mnt_flags &= ~MNT_PNODE_MASK;
+	mnt->mnt_flags |= MNT_SHARED;
+}
+
+void set_mnt_shared(struct vfsmount *mnt)
+{
+	int res;
 
-	for (m = next_peer(mnt); m != mnt; m = next_peer(m))
-		id = min(id, m->mnt_id);
+ retry:
+	spin_lock(&mnt_pgid_lock);
+	if (IS_MNT_SHARED(mnt)) {
+		spin_unlock(&mnt_pgid_lock);
+		return;
+	}
 
-	return id;
+	res = ida_get_new(&mnt_pgid_ida, &mnt->mnt_pgid);
+	spin_unlock(&mnt_pgid_lock);
+	if (res == -EAGAIN) {
+		if (ida_pre_get(&mnt_pgid_ida, GFP_KERNEL))
+			goto retry;
+	}
+	__set_mnt_shared(mnt);
+}
+
+void clear_mnt_shared(struct vfsmount *mnt)
+{
+	if (IS_MNT_SHARED(mnt)) {
+		mnt->mnt_flags &= ~MNT_SHARED;
+		mnt->mnt_pgid = -1;
+	}
+}
+
+void make_mnt_peer(struct vfsmount *old, struct vfsmount *mnt)
+{
+	mnt->mnt_pgid = old->mnt_pgid;
+	list_add(&mnt->mnt_share, &old->mnt_share);
+	__set_mnt_shared(mnt);
 }
 
-/* return the smallest ID within the peer group */
 int get_peer_group_id(struct vfsmount *mnt)
 {
+	return mnt->mnt_pgid;
+}
+
+int get_master_id(struct vfsmount *mnt)
+{
 	int id;
 
 	spin_lock(&vfsmount_lock);
-	id = __peer_group_id(mnt);
+	id = get_peer_group_id(mnt->mnt_master);
 	spin_unlock(&vfsmount_lock);
 
 	return id;
 }
 
-/* return the smallest ID within the master's peer group */
-int get_master_id(struct vfsmount *mnt)
+static struct vfsmount *get_peer_in_ns(struct vfsmount *mnt,
+				       struct mnt_namespace *ns)
 {
-	int id;
+	struct vfsmount *m = mnt;
+
+	do {
+		if (m->mnt_ns == ns)
+			return m;
+		m = next_peer(m);
+	} while (m != mnt);
+
+	return NULL;
+}
+
+int get_dominator_id_same_ns(struct vfsmount *mnt)
+{
+	int id = -1;
+	struct vfsmount *m;
 
 	spin_lock(&vfsmount_lock);
-	id = __peer_group_id(mnt->mnt_master);
+	for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
+		struct vfsmount *d = get_peer_in_ns(m, mnt->mnt_ns);
+		if (d) {
+			id = d->mnt_pgid;
+			break;
+		}
+	}
 	spin_unlock(&vfsmount_lock);
 
 	return id;
@@ -80,7 +138,13 @@ static int do_make_slave(struct vfsmount
 		if (peer_mnt == mnt)
 			peer_mnt = NULL;
 	}
-	list_del_init(&mnt->mnt_share);
+	if (!list_empty(&mnt->mnt_share))
+		list_del_init(&mnt->mnt_share);
+	else if (IS_MNT_SHARED(mnt)) {
+		spin_lock(&mnt_pgid_lock);
+		ida_remove(&mnt_pgid_ida, mnt->mnt_pgid);
+		spin_unlock(&mnt_pgid_lock);
+	}
 
 	if (peer_mnt)
 		master = peer_mnt;
@@ -89,20 +153,18 @@ static int do_make_slave(struct vfsmount
 		list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
 			slave_mnt->mnt_master = master;
 		list_move(&mnt->mnt_slave, &master->mnt_slave_list);
-		list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
-		INIT_LIST_HEAD(&mnt->mnt_slave_list);
+		list_splice_init(&mnt->mnt_slave_list,
+				 master->mnt_slave_list.prev);
 	} else {
-		struct list_head *p = &mnt->mnt_slave_list;
-		while (!list_empty(p)) {
-                        slave_mnt = list_first_entry(p,
+		while (!list_empty(&mnt->mnt_slave_list)) {
+			slave_mnt = list_first_entry(&mnt->mnt_slave_list,
 					struct vfsmount, mnt_slave);
 			list_del_init(&slave_mnt->mnt_slave);
 			slave_mnt->mnt_master = NULL;
 		}
 	}
 	mnt->mnt_master = master;
-	CLEAR_MNT_SHARED(mnt);
-	INIT_LIST_HEAD(&mnt->mnt_slave_list);
+	clear_mnt_shared(mnt);
 	return 0;
 }
 
Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c	2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/namespace.c	2008-02-22 15:27:26.000000000 +0100
@@ -95,6 +95,7 @@ struct vfsmount *alloc_vfsmnt(const char
 			return NULL;
 		}
 
+		mnt->mnt_pgid = -1;
 		atomic_set(&mnt->mnt_count, 1);
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
@@ -537,10 +538,12 @@ static struct vfsmount *clone_mnt(struct
 		if (flag & CL_SLAVE) {
 			list_add(&mnt->mnt_slave, &old->mnt_slave_list);
 			mnt->mnt_master = old;
-			CLEAR_MNT_SHARED(mnt);
+			clear_mnt_shared(mnt);
 		} else if (!(flag & CL_PRIVATE)) {
-			if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
-				list_add(&mnt->mnt_share, &old->mnt_share);
+			if (flag & CL_PROPAGATION)
+				set_mnt_shared(old);
+			if (IS_MNT_SHARED(old))
+				make_mnt_peer(old, mnt);
 			if (IS_MNT_SLAVE(old))
 				list_add(&mnt->mnt_slave, &old->mnt_slave);
 			mnt->mnt_master = old->mnt_master;
@@ -795,16 +798,24 @@ static int show_mountinfo(struct seq_fil
 	show_sb_opts(m, sb);
 	if (sb->s_op->show_options)
 		err = sb->s_op->show_options(m, mnt);
-	if (IS_MNT_SHARED(mnt)) {
-		seq_printf(m, " shared:%i", get_peer_group_id(mnt));
-		if (IS_MNT_SLAVE(mnt))
-			seq_printf(m, ",slave:%i", get_master_id(mnt));
-	} else if (IS_MNT_SLAVE(mnt)) {
-		seq_printf(m, " slave:%i", get_master_id(mnt));
+	seq_putc(m, ' ');
+	if (IS_MNT_SHARED(mnt) || IS_MNT_SLAVE(mnt)) {
+		if (IS_MNT_SHARED(mnt))
+			seq_printf(m, "shared:%i", get_peer_group_id(mnt));
+		if (IS_MNT_SLAVE(mnt)) {
+			int dominator_id = get_dominator_id_same_ns(mnt);
+
+			if (IS_MNT_SHARED(mnt))
+				seq_putc(m, ',');
+
+			seq_printf(m, "slave:%i", get_master_id(mnt));
+			if (dominator_id != -1)
+				seq_printf(m, ":%i", dominator_id);
+		}
 	} else if (IS_MNT_UNBINDABLE(mnt)) {
-		seq_printf(m, " unbindable");
+		seq_printf(m, "unbindable");
 	} else {
-		seq_printf(m, " private");
+		seq_printf(m, "private");
 	}
 	seq_putc(m, '\n');
 	return err;
Index: linux/fs/pnode.h
===================================================================
--- linux.orig/fs/pnode.h	2008-02-22 15:27:23.000000000 +0100
+++ linux/fs/pnode.h	2008-02-22 15:27:26.000000000 +0100
@@ -14,7 +14,6 @@
 #define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED)
 #define IS_MNT_SLAVE(mnt) (mnt->mnt_master)
 #define IS_MNT_NEW(mnt)  (!mnt->mnt_ns)
-#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED)
 #define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE)
 
 #define CL_EXPIRE    		0x01
@@ -24,12 +23,9 @@
 #define CL_PROPAGATION 		0x10
 #define CL_PRIVATE 		0x20
 
-static inline void set_mnt_shared(struct vfsmount *mnt)
-{
-	mnt->mnt_flags &= ~MNT_PNODE_MASK;
-	mnt->mnt_flags |= MNT_SHARED;
-}
-
+void set_mnt_shared(struct vfsmount *);
+void clear_mnt_shared(struct vfsmount *);
+void make_mnt_peer(struct vfsmount *, struct vfsmount *);
 void change_mnt_propagation(struct vfsmount *, int);
 int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *,
 		struct list_head *);
@@ -37,4 +33,5 @@ int propagate_umount(struct list_head *)
 int propagate_mount_busy(struct vfsmount *, int);
 int get_peer_group_id(struct vfsmount *);
 int get_master_id(struct vfsmount *);
+int get_dominator_id_same_ns(struct vfsmount *);
 #endif /* _LINUX_PNODE_H */
Index: linux/include/linux/mount.h
===================================================================
--- linux.orig/include/linux/mount.h	2008-02-22 15:27:23.000000000 +0100
+++ linux/include/linux/mount.h	2008-02-22 15:27:26.000000000 +0100
@@ -57,6 +57,7 @@ struct vfsmount {
 	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
+	int mnt_pgid;			/* peer group identifier */
 	/*
 	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
 	 * to let these frequently modified fields in a separate cache line
Index: linux/Documentation/filesystems/proc.txt
===================================================================
--- linux.orig/Documentation/filesystems/proc.txt	2008-02-22 15:27:23.000000000 +0100
+++ linux/Documentation/filesystems/proc.txt	2008-02-22 15:27:26.000000000 +0100
@@ -2367,21 +2367,20 @@ MNTOPTS: per mount options
 SBOPTS: per super block options
 PROPAGATION: propagation type
 
-propagation type: <propagation_flag>[:<mntid>][,...]
-	note: 'shared' flag is followed by the mntid of its peer mount
-	      'slave' flag is followed by the mntid of its master mount
+propagation type: <propagation_flag>[:<peergrpid>[:<domgrpid>]][,...]
+	note: 'shared' flag is followed by the id of this mount's peer group
+	      'slave' flag is followed by the peer group id of its master mount,
+	      	      optionally followed by the id of the closest dominant(*)
+		      peer group in the same namespace, if one exists.
 	      'private' flag stands by itself
 	      'unbindable' flag stands by itself
 
-The 'mntid' used in the propagation type is a canonical ID of the peer
-group (currently the smallest ID within the group is used for this
-purpose, but this should not be relied on).  Since mounts can be added
-or removed from the peer group, this ID only guaranteed to stay the
-same on a static propagation tree.
+(*) A dominant peer group is an ancestor of this mount in the
+propagation tree, in other words, this mount receives propagation from
+the dominant peer group, but not the other way round.
 
 For more information see:
 
   Documentation/filesystems/sharedsubtree.txt
 
-
 ------------------------------------------------------------------------------
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux