[patch 05/33] fs: scale mntget/mntput

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Improve scalability of mntget/mntput by using per-cpu counters protected
by the reader side of the brlock vfsmount_lock. mnt_mounted keeps track of
whether the vfsmount is actually attached to the tree so we can shortcut
expensive checks in mntput.
---
 fs/libfs.c            |    1 
 fs/namespace.c        |  122 +++++++++++++++++++++++++++++++++++++++++++-------
 fs/pnode.c            |    2 
 include/linux/mount.h |   33 ++++---------
 4 files changed, 121 insertions(+), 37 deletions(-)

Index: linux-2.6/fs/namespace.c
===================================================================
--- linux-2.6.orig/fs/namespace.c
+++ linux-2.6/fs/namespace.c
@@ -177,6 +177,49 @@ void mnt_release_group_id(struct vfsmoun
 	mnt->mnt_group_id = 0;
 }
 
+static inline void add_mnt_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id())) += n;
+#else
+	mnt->mnt_count += n;
+#endif
+}
+
+static inline void inc_mnt_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))++;
+#else
+	mnt->mnt_count++;
+#endif
+}
+
+static inline void dec_mnt_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))--;
+#else
+	mnt->mnt_count--;
+#endif
+}
+
+unsigned int count_mnt_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	unsigned int count = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		count += *per_cpu_ptr(mnt->mnt_count, cpu);
+	}
+
+	return count;
+#else
+	return mnt->mnt_count;
+#endif
+}
+
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
 	struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -193,7 +236,13 @@ struct vfsmount *alloc_vfsmnt(const char
 				goto out_free_id;
 		}
 
-		atomic_set(&mnt->mnt_count, 1);
+#ifdef CONFIG_SMP
+		mnt->mnt_count = alloc_percpu(int);
+		if (!mnt->mnt_count)
+			goto out_free_devname;
+#else
+		mnt->mnt_count = 0;
+#endif
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -205,14 +254,19 @@ struct vfsmount *alloc_vfsmnt(const char
 #ifdef CONFIG_SMP
 		mnt->mnt_writers = alloc_percpu(int);
 		if (!mnt->mnt_writers)
-			goto out_free_devname;
+			goto out_free_mntcount;
 #else
 		mnt->mnt_writers = 0;
 #endif
+		preempt_disable();
+		inc_mnt_count(mnt);
+		preempt_enable();
 	}
 	return mnt;
 
 #ifdef CONFIG_SMP
+out_free_mntcount:
+	free_percpu(mnt->mnt_count);
 out_free_devname:
 	kfree(mnt->mnt_devname);
 #endif
@@ -526,9 +580,11 @@ static void detach_mnt(struct vfsmount *
 	old_path->mnt = mnt->mnt_parent;
 	mnt->mnt_parent = mnt;
 	mnt->mnt_mountpoint = mnt->mnt_root;
-	list_del_init(&mnt->mnt_child);
 	list_del_init(&mnt->mnt_hash);
+	list_del_init(&mnt->mnt_child);
 	old_path->dentry->d_mounted--;
+	WARN_ON(mnt->mnt_mounted != 1);
+	mnt->mnt_mounted--;
 }
 
 void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
@@ -545,6 +601,8 @@ static void attach_mnt(struct vfsmount *
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 			hash(path->mnt, path->dentry));
 	list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
+	WARN_ON(mnt->mnt_mounted != 0);
+	mnt->mnt_mounted++;
 }
 
 /*
@@ -567,6 +625,8 @@ static void commit_tree(struct vfsmount
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 				hash(parent, mnt->mnt_mountpoint));
 	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+	WARN_ON(mnt->mnt_mounted != 0);
+	mnt->mnt_mounted++;
 	touch_mnt_namespace(n);
 }
 
@@ -670,50 +730,80 @@ static inline void __mntput(struct vfsmo
 
 void mntput_no_expire(struct vfsmount *mnt)
 {
-repeat:
-	/* open-code atomic_dec_and_lock for the vfsmount lock */
-	if (atomic_add_unless(&mnt->mnt_count, -1, 1))
+	if (likely(mnt->mnt_mounted)) {
+		vfsmount_read_lock();
+		if (unlikely(!mnt->mnt_mounted)) {
+			vfsmount_read_unlock();
+			goto repeat;
+		}
+		dec_mnt_count(mnt);
+		BUG_ON(count_mnt_count(mnt) == 0);
+		vfsmount_read_unlock();
+
 		return;
+	}
+
+repeat:
 	vfsmount_write_lock();
-	if (!atomic_dec_and_test(&mnt->mnt_count)) {
+	BUG_ON(mnt->mnt_mounted);
+	dec_mnt_count(mnt);
+	if (count_mnt_count(mnt)) {
 		vfsmount_write_unlock();
 		return;
 	}
-
 	if (likely(!mnt->mnt_pinned)) {
 		vfsmount_write_unlock();
 		__mntput(mnt);
 		return;
 	}
-	atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
+	add_mnt_count(mnt, mnt->mnt_pinned + 1);
 	mnt->mnt_pinned = 0;
 	vfsmount_write_unlock();
 	acct_auto_close_mnt(mnt);
 	security_sb_umount_close(mnt);
 	goto repeat;
 }
-
 EXPORT_SYMBOL(mntput_no_expire);
 
+void mntput(struct vfsmount *mnt)
+{
+	if (mnt) {
+		/* avoid cacheline pingpong */
+		if (unlikely(mnt->mnt_expiry_mark))
+			mnt->mnt_expiry_mark = 0;
+		mntput_no_expire(mnt);
+	}
+}
+EXPORT_SYMBOL(mntput);
+
+struct vfsmount *mntget(struct vfsmount *mnt)
+{
+	if (mnt) {
+		preempt_disable();
+		inc_mnt_count(mnt);
+		preempt_enable();
+	}
+	return mnt;
+}
+EXPORT_SYMBOL(mntget);
+
 void mnt_pin(struct vfsmount *mnt)
 {
 	vfsmount_write_lock();
 	mnt->mnt_pinned++;
 	vfsmount_write_unlock();
 }
-
 EXPORT_SYMBOL(mnt_pin);
 
 void mnt_unpin(struct vfsmount *mnt)
 {
 	vfsmount_write_lock();
 	if (mnt->mnt_pinned) {
-		atomic_inc(&mnt->mnt_count);
+		inc_mnt_count(mnt);
 		mnt->mnt_pinned--;
 	}
 	vfsmount_write_unlock();
 }
-
 EXPORT_SYMBOL(mnt_unpin);
 
 static inline void mangle(struct seq_file *m, const char *s)
@@ -996,7 +1086,7 @@ int may_umount_tree(struct vfsmount *mnt
 
 	vfsmount_read_lock();
 	for (p = mnt; p; p = next_mnt(p, mnt)) {
-		actual_refs += atomic_read(&p->mnt_count);
+		actual_refs += count_mnt_count(p);
 		minimum_refs += 2;
 	}
 	vfsmount_read_unlock();
@@ -1076,6 +1166,8 @@ void umount_tree(struct vfsmount *mnt, i
 		__touch_mnt_namespace(p->mnt_ns);
 		p->mnt_ns = NULL;
 		list_del_init(&p->mnt_child);
+		WARN_ON(p->mnt_mounted != 1);
+		p->mnt_mounted--;
 		if (p->mnt_parent != p) {
 			p->mnt_parent->mnt_ghosts++;
 			p->mnt_mountpoint->d_mounted--;
@@ -1107,7 +1199,7 @@ static int do_umount(struct vfsmount *mn
 		    flags & (MNT_FORCE | MNT_DETACH))
 			return -EINVAL;
 
-		if (atomic_read(&mnt->mnt_count) != 2)
+		if (count_mnt_count(mnt) != 2)
 			return -EBUSY;
 
 		if (!xchg(&mnt->mnt_expiry_mark, 1))
Index: linux-2.6/include/linux/mount.h
===================================================================
--- linux-2.6.orig/include/linux/mount.h
+++ linux-2.6/include/linux/mount.h
@@ -56,20 +56,20 @@ struct vfsmount {
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
-	/*
-	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
-	 * to let these frequently modified fields in a separate cache line
-	 * (so that reads of mnt_flags wont ping-pong on SMP machines)
-	 */
-	atomic_t mnt_count;
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
+	int mnt_mounted;
 #ifdef CONFIG_SMP
 	int *mnt_writers;
 #else
 	int mnt_writers;
 #endif
+#ifdef CONFIG_SMP
+	int *mnt_count;
+#else
+	int mnt_count;
+#endif
 };
 
 static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
@@ -81,13 +81,6 @@ static inline int *get_mnt_writers_ptr(s
 #endif
 }
 
-static inline struct vfsmount *mntget(struct vfsmount *mnt)
-{
-	if (mnt)
-		atomic_inc(&mnt->mnt_count);
-	return mnt;
-}
-
 struct file; /* forward dec */
 
 extern void vfsmount_read_lock(void);
@@ -95,23 +88,21 @@ extern void vfsmount_read_unlock(void);
 extern void vfsmount_write_lock(void);
 extern void vfsmount_write_unlock(void);
 
+extern unsigned int count_mnt_count(struct vfsmount *mnt);
+
 extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
 extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
+
 extern void mntput_no_expire(struct vfsmount *mnt);
+extern struct vfsmount *mntget(struct vfsmount *mnt);
+extern void mntput(struct vfsmount *mnt);
+
 extern void mnt_pin(struct vfsmount *mnt);
 extern void mnt_unpin(struct vfsmount *mnt);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
-static inline void mntput(struct vfsmount *mnt)
-{
-	if (mnt) {
-		mnt->mnt_expiry_mark = 0;
-		mntput_no_expire(mnt);
-	}
-}
-
 extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
 				      const char *name, void *data);
 
Index: linux-2.6/fs/pnode.c
===================================================================
--- linux-2.6.orig/fs/pnode.c
+++ linux-2.6/fs/pnode.c
@@ -279,7 +279,7 @@ out:
  */
 static inline int do_refcount_check(struct vfsmount *mnt, int count)
 {
-	int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
+	int mycount = count_mnt_count(mnt) - mnt->mnt_ghosts;
 	return (mycount > count);
 }
 
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c
+++ linux-2.6/fs/libfs.c
@@ -244,6 +244,7 @@ int get_sb_pseudo(struct file_system_typ
 	d_instantiate(dentry, root);
 	s->s_root = dentry;
 	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_mounted++; /* never unmounted, shortcut mntget (XXX: OK?) */
 	simple_set_mnt(mnt, s);
 	return 0;
 


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux