[PATCHES] improve sysfs performance, make it O(log n)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi

Here are 5 patches to make sysfs O(log n) and improve performance on 
operations with many block devices.

Patch 1 changes synchronize_rcu() to synchronize_rcu_expedited(), it 
causes massive speedup when deleting many block devices.

Patch 2 fixes inefficient i_nlink calculation in sysfs.

Patch 3 makes name lookups use rb-tree.

Patch 4 removes s_sibling hack (it has no effect on performance).

Patch 5 makes inode number lookups use rb-tree --- thus making all types 
of sysfs directory operations O(log n).


Performance testing --- 10000 dm devices created with dmsetup:

The numbers are time in seconds to perform these operations:
1. create 10000 devices
2. ls -la /sys/block
3. ls -al /sys/block after a cache flush (echo 3 >/proc/sys/vm/drop_caches)
4. dmsetup remove_all

no patches	109	9.8	12	1624
patch 1		109	9.1	12	15
patches 1,2	120	0.19	3.0	16
patches 1,2,3	68	0.17	0.47	12
patches 1,2,3,4	68	0.15	0.48	11
all patches	37	0.16	0.46	3.3

Mikulas
backing-dev: use synchronize_rcu_expedited instead of synchronize_rcu

synchronize_rcu sleeps several timer ticks.
When removing thousands block devices, this sleeping
causes a severe delay.

Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx>

---
 mm/backing-dev.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux-3.0-rc7-fast/mm/backing-dev.c
===================================================================
--- linux-3.0-rc7-fast.orig/mm/backing-dev.c	2011-07-19 18:01:00.000000000 +0200
+++ linux-3.0-rc7-fast/mm/backing-dev.c	2011-07-19 18:01:07.000000000 +0200
@@ -505,7 +505,7 @@ static void bdi_remove_from_list(struct 
 	list_del_rcu(&bdi->bdi_list);
 	spin_unlock_bh(&bdi_lock);
 
-	synchronize_rcu();
+	synchronize_rcu_expedited();
 }
 
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
sysfs: count subdirectories

This patch introduces a subdirectory counter for each sysfs directory.

Without the patch, sysfs_refresh_inode would walk all entries of the directory
to calculate the number of subdirectories.

Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx>

---
 fs/sysfs/dir.c   |    6 ++++++
 fs/sysfs/inode.c |   14 +-------------
 fs/sysfs/sysfs.h |    2 ++
 3 files changed, 9 insertions(+), 13 deletions(-)

Index: linux-3.0-rc7-fast/fs/sysfs/dir.c
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/dir.c	2011-07-18 19:43:05.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/dir.c	2011-07-18 19:45:06.000000000 +0200
@@ -47,6 +47,9 @@ static void sysfs_link_sibling(struct sy
 
 	BUG_ON(sd->s_sibling);
 
+	if (sysfs_type(sd) == SYSFS_DIR)
+		parent_sd->s_dir.subdirs++;
+
 	/* Store directory entries in order by ino.  This allows
 	 * readdir to properly restart without having to add a
 	 * cursor into the s_dir.children list.
@@ -73,6 +76,9 @@ static void sysfs_unlink_sibling(struct 
 {
 	struct sysfs_dirent **pos;
 
+	if (sysfs_type(sd) == SYSFS_DIR)
+		sd->s_parent->s_dir.subdirs--;
+
 	for (pos = &sd->s_parent->s_dir.children; *pos;
 	     pos = &(*pos)->s_sibling) {
 		if (*pos == sd) {
Index: linux-3.0-rc7-fast/fs/sysfs/inode.c
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/inode.c	2011-07-18 19:43:33.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/inode.c	2011-07-18 19:45:50.000000000 +0200
@@ -202,18 +202,6 @@ static inline void set_inode_attr(struct
 	inode->i_ctime = iattr->ia_ctime;
 }
 
-static int sysfs_count_nlink(struct sysfs_dirent *sd)
-{
-	struct sysfs_dirent *child;
-	int nr = 0;
-
-	for (child = sd->s_dir.children; child; child = child->s_sibling)
-		if (sysfs_type(child) == SYSFS_DIR)
-			nr++;
-
-	return nr + 2;
-}
-
 static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
 	struct sysfs_inode_attrs *iattrs = sd->s_iattr;
@@ -230,7 +218,7 @@ static void sysfs_refresh_inode(struct s
 	}
 
 	if (sysfs_type(sd) == SYSFS_DIR)
-		inode->i_nlink = sysfs_count_nlink(sd);
+		inode->i_nlink = sd->s_dir.subdirs + 2;
 }
 
 int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
Index: linux-3.0-rc7-fast/fs/sysfs/sysfs.h
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/sysfs.h	2011-07-18 19:42:42.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/sysfs.h	2011-07-18 19:44:28.000000000 +0200
@@ -19,6 +19,8 @@ struct sysfs_elem_dir {
 	struct kobject		*kobj;
 	/* children list starts here and goes through sd->s_sibling */
 	struct sysfs_dirent	*children;
+
+	unsigned long		subdirs;
 };
 
 struct sysfs_elem_symlink {
sysfs: use rb-tree for name lookups

Use red-black tree for name lookups.

Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx>

---
 fs/sysfs/dir.c   |   45 +++++++++++++++++++++++++++++++++++++++------
 fs/sysfs/sysfs.h |    5 +++++
 2 files changed, 44 insertions(+), 6 deletions(-)

Index: linux-3.0-rc7-fast/fs/sysfs/sysfs.h
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/sysfs.h	2011-07-18 20:06:24.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/sysfs.h	2011-07-18 20:16:32.000000000 +0200
@@ -11,6 +11,7 @@
 #include <linux/lockdep.h>
 #include <linux/kobject_ns.h>
 #include <linux/fs.h>
+#include <linux/rbtree.h>
 
 struct sysfs_open_dirent;
 
@@ -21,6 +22,8 @@ struct sysfs_elem_dir {
 	struct sysfs_dirent	*children;
 
 	unsigned long		subdirs;
+
+	struct rb_root		name_tree;
 };
 
 struct sysfs_elem_symlink {
@@ -61,6 +64,8 @@ struct sysfs_dirent {
 	struct sysfs_dirent	*s_sibling;
 	const char		*s_name;
 
+	struct rb_node		name_node;
+
 	const void		*s_ns; /* namespace tag */
 	union {
 		struct sysfs_elem_dir		s_dir;
Index: linux-3.0-rc7-fast/fs/sysfs/dir.c
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/dir.c	2011-07-18 20:06:24.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/dir.c	2011-07-18 20:16:02.000000000 +0200
@@ -45,6 +45,9 @@ static void sysfs_link_sibling(struct sy
 	struct sysfs_dirent *parent_sd = sd->s_parent;
 	struct sysfs_dirent **pos;
 
+	struct rb_node **p;
+	struct rb_node *parent;
+
 	BUG_ON(sd->s_sibling);
 
 	if (sysfs_type(sd) == SYSFS_DIR)
@@ -60,6 +63,26 @@ static void sysfs_link_sibling(struct sy
 	}
 	sd->s_sibling = *pos;
 	*pos = sd;
+
+	p = &parent_sd->s_dir.name_tree.rb_node;
+	parent = NULL;
+	while (*p) {
+		int c;
+		parent = *p;
+#define node	rb_entry(parent, struct sysfs_dirent, name_node)
+		c = strcmp(sd->s_name, node->s_name);
+		if (c < 0) {
+			p = &node->name_node.rb_left;
+		} else if (c > 0) {
+			p = &node->name_node.rb_right;
+		} else {
+			printk(KERN_CRIT "sysfs: inserting duplicate filename '%s'\n", sd->s_name);
+			BUG();
+		}
+#undef node
+	}
+	rb_link_node(&sd->name_node, parent, p);
+	rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree);
 }
 
 /**
@@ -87,6 +110,8 @@ static void sysfs_unlink_sibling(struct 
 			break;
 		}
 	}
+
+	rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree);
 }
 
 /**
@@ -546,14 +571,22 @@ struct sysfs_dirent *sysfs_find_dirent(s
 				       const void *ns,
 				       const unsigned char *name)
 {
-	struct sysfs_dirent *sd;
+	struct rb_node *p = parent_sd->s_dir.name_tree.rb_node;
 
-	for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
-		if (ns && sd->s_ns && (sd->s_ns != ns))
-			continue;
-		if (!strcmp(sd->s_name, name))
-			return sd;
+	while (p) {
+		int c;
+#define node	rb_entry(p, struct sysfs_dirent, name_node)
+		c = strcmp(name, node->s_name);
+		if (c < 0) {
+			p = node->name_node.rb_left;
+		} else if (c > 0) {
+			p = node->name_node.rb_right;
+		} else {
+			return node;
+		}
+#undef node
 	}
+
 	return NULL;
 }
 
sysfs: remove s_sibling hacks

s_sibling was used for three different purposes:
1) as a linked list of entries in the directory
2) as a linked list of entries to be deleted
3) as a pointer to "struct completion"

This patch removes the hack and introduces new union u which
holds pointers for cases 2) and 3).

This change is needed for the following patch that removes s_sibling at all
and replaces it with a rb tree.

Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx>

---
 fs/sysfs/dir.c   |   19 +++++++------------
 fs/sysfs/sysfs.h |    5 +++++
 2 files changed, 12 insertions(+), 12 deletions(-)

Index: linux-3.0-rc7-fast/fs/sysfs/dir.c
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/dir.c	2011-07-20 20:42:25.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/dir.c	2011-07-20 20:55:15.000000000 +0200
@@ -157,7 +157,6 @@ struct sysfs_dirent *sysfs_get_active(st
  */
 void sysfs_put_active(struct sysfs_dirent *sd)
 {
-	struct completion *cmpl;
 	int v;
 
 	if (unlikely(!sd))
@@ -169,10 +168,9 @@ void sysfs_put_active(struct sysfs_diren
 		return;
 
 	/* atomic_dec_return() is a mb(), we'll always see the updated
-	 * sd->s_sibling.
+	 * sd->u.completion.
 	 */
-	cmpl = (void *)sd->s_sibling;
-	complete(cmpl);
+	complete(sd->u.completion);
 }
 
 /**
@@ -186,16 +184,16 @@ static void sysfs_deactivate(struct sysf
 	DECLARE_COMPLETION_ONSTACK(wait);
 	int v;
 
-	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
+	BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
 
 	if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
 		return;
 
-	sd->s_sibling = (void *)&wait;
+	sd->u.completion = (void *)&wait;
 
 	rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
 	/* atomic_add_return() is a mb(), put_active() will always see
-	 * the updated sd->s_sibling.
+	 * the updated sd->u.completion.
 	 */
 	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
 
@@ -204,8 +202,6 @@ static void sysfs_deactivate(struct sysf
 		wait_for_completion(&wait);
 	}
 
-	sd->s_sibling = NULL;
-
 	lock_acquired(&sd->dep_map, _RET_IP_);
 	rwsem_release(&sd->dep_map, 1, _RET_IP_);
 }
@@ -521,7 +517,7 @@ void sysfs_remove_one(struct sysfs_addrm
 	}
 
 	sd->s_flags |= SYSFS_FLAG_REMOVED;
-	sd->s_sibling = acxt->removed;
+	sd->u.removed_list = acxt->removed;
 	acxt->removed = sd;
 }
 
@@ -545,8 +541,7 @@ void sysfs_addrm_finish(struct sysfs_add
 	while (acxt->removed) {
 		struct sysfs_dirent *sd = acxt->removed;
 
-		acxt->removed = sd->s_sibling;
-		sd->s_sibling = NULL;
+		acxt->removed = sd->u.removed_list;
 
 		sysfs_deactivate(sd);
 		unmap_bin_file(sd);
Index: linux-3.0-rc7-fast/fs/sysfs/sysfs.h
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/sysfs.h	2011-07-20 20:42:12.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/sysfs.h	2011-07-20 20:46:50.000000000 +0200
@@ -66,6 +66,11 @@ struct sysfs_dirent {
 
 	struct rb_node		name_node;
 
+	union {
+		struct completion	*completion;
+		struct sysfs_dirent	*removed_list;
+	} u;
+
 	const void		*s_ns; /* namespace tag */
 	union {
 		struct sysfs_elem_dir		s_dir;
sysfs: use rb-tree for inode number lookup

This patch makes sysfs use red-black tree for inode number lookup.
Together with a previous patch to use red-black tree for name lookup,
this patch makes all sysfs lookups to have O(log n) complexity.

Signed-off-by: Mikulas Patocka <mpatocka@xxxxxxxxxx>

---
 fs/sysfs/dir.c   |   89 ++++++++++++++++++++++++++++++-------------------------
 fs/sysfs/sysfs.h |    5 +--
 2 files changed, 52 insertions(+), 42 deletions(-)

Index: linux-3.0-rc7-fast/fs/sysfs/dir.c
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/dir.c	2011-07-20 20:55:15.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/dir.c	2011-07-20 23:30:47.000000000 +0200
@@ -43,26 +43,30 @@ static DEFINE_IDA(sysfs_ino_ida);
 static void sysfs_link_sibling(struct sysfs_dirent *sd)
 {
 	struct sysfs_dirent *parent_sd = sd->s_parent;
-	struct sysfs_dirent **pos;
 
 	struct rb_node **p;
 	struct rb_node *parent;
 
-	BUG_ON(sd->s_sibling);
-
 	if (sysfs_type(sd) == SYSFS_DIR)
 		parent_sd->s_dir.subdirs++;
 
-	/* Store directory entries in order by ino.  This allows
-	 * readdir to properly restart without having to add a
-	 * cursor into the s_dir.children list.
-	 */
-	for (pos = &parent_sd->s_dir.children; *pos; pos = &(*pos)->s_sibling) {
-		if (sd->s_ino < (*pos)->s_ino)
-			break;
+	p = &parent_sd->s_dir.inode_tree.rb_node;
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+#define node	rb_entry(parent, struct sysfs_dirent, inode_node)
+		if (sd->s_ino < node->s_ino) {
+			p = &node->inode_node.rb_left;
+		} else if (sd->s_ino > node->s_ino) {
+			p = &node->inode_node.rb_right;
+		} else {
+			printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n", sd->s_ino);
+			BUG();
+		}
+#undef node
 	}
-	sd->s_sibling = *pos;
-	*pos = sd;
+	rb_link_node(&sd->inode_node, parent, p);
+	rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree);
 
 	p = &parent_sd->s_dir.name_tree.rb_node;
 	parent = NULL;
@@ -97,20 +101,10 @@ static void sysfs_link_sibling(struct sy
  */
 static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent **pos;
-
 	if (sysfs_type(sd) == SYSFS_DIR)
 		sd->s_parent->s_dir.subdirs--;
 
-	for (pos = &sd->s_parent->s_dir.children; *pos;
-	     pos = &(*pos)->s_sibling) {
-		if (*pos == sd) {
-			*pos = sd->s_sibling;
-			sd->s_sibling = NULL;
-			break;
-		}
-	}
-
+	rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree);
 	rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree);
 }
 
@@ -778,21 +772,19 @@ void sysfs_remove_subdir(struct sysfs_di
 static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
 {
 	struct sysfs_addrm_cxt acxt;
-	struct sysfs_dirent **pos;
+	struct rb_node *pos;
 
 	if (!dir_sd)
 		return;
 
 	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
 	sysfs_addrm_start(&acxt, dir_sd);
-	pos = &dir_sd->s_dir.children;
-	while (*pos) {
-		struct sysfs_dirent *sd = *pos;
-
+	pos = rb_first(&dir_sd->s_dir.inode_tree);
+	while (pos) {
+		struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node);
+		pos = rb_next(pos);
 		if (sysfs_type(sd) != SYSFS_DIR)
 			sysfs_remove_one(&acxt, sd);
-		else
-			pos = &(*pos)->s_sibling;
 	}
 	sysfs_addrm_finish(&acxt);
 
@@ -915,12 +907,28 @@ static struct sysfs_dirent *sysfs_dir_po
 			pos = NULL;
 	}
 	if (!pos && (ino > 1) && (ino < INT_MAX)) {
-		pos = parent_sd->s_dir.children;
-		while (pos && (ino > pos->s_ino))
-			pos = pos->s_sibling;
+		struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node;
+		while (p) {
+#define node	rb_entry(p, struct sysfs_dirent, inode_node)
+			if (ino < node->s_ino) {
+				pos = node;
+				p = node->inode_node.rb_left;
+			} else if (node->s_ino > ino) {
+				p = node->inode_node.rb_right;
+			} else {
+				pos = node;
+				break;
+			}
+#undef node
+		}
+	}
+	while (pos && pos->s_ns && pos->s_ns != ns) {
+		struct rb_node *p = rb_next(&pos->inode_node);
+		if (!p)
+			pos = NULL;
+		else
+			pos = rb_entry(p, struct sysfs_dirent, inode_node);
 	}
-	while (pos && pos->s_ns && pos->s_ns != ns)
-		pos = pos->s_sibling;
 	return pos;
 }
 
@@ -928,10 +936,13 @@ static struct sysfs_dirent *sysfs_dir_ne
 	struct sysfs_dirent *parent_sd,	ino_t ino, struct sysfs_dirent *pos)
 {
 	pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
-	if (pos)
-		pos = pos->s_sibling;
-	while (pos && pos->s_ns && pos->s_ns != ns)
-		pos = pos->s_sibling;
+	if (pos) do {
+		struct rb_node *p = rb_next(&pos->inode_node);
+		if (!p)
+			pos = NULL;
+		else
+			pos = rb_entry(p, struct sysfs_dirent, inode_node);
+	} while (pos && pos->s_ns && pos->s_ns != ns);
 	return pos;
 }
 
Index: linux-3.0-rc7-fast/fs/sysfs/sysfs.h
===================================================================
--- linux-3.0-rc7-fast.orig/fs/sysfs/sysfs.h	2011-07-20 20:46:50.000000000 +0200
+++ linux-3.0-rc7-fast/fs/sysfs/sysfs.h	2011-07-20 21:00:28.000000000 +0200
@@ -18,11 +18,10 @@ struct sysfs_open_dirent;
 /* type-specific structures for sysfs_dirent->s_* union members */
 struct sysfs_elem_dir {
 	struct kobject		*kobj;
-	/* children list starts here and goes through sd->s_sibling */
-	struct sysfs_dirent	*children;
 
 	unsigned long		subdirs;
 
+	struct rb_root		inode_tree;
 	struct rb_root		name_tree;
 };
 
@@ -61,9 +60,9 @@ struct sysfs_dirent {
 	struct lockdep_map	dep_map;
 #endif
 	struct sysfs_dirent	*s_parent;
-	struct sysfs_dirent	*s_sibling;
 	const char		*s_name;
 
+	struct rb_node		inode_node;
 	struct rb_node		name_node;
 
 	union {
--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/dm-devel

[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux