[PATCH] exfat: change i_pos based exfat hash to rb tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hashtable for identifying inode by i_pos (cluster+dentry) have slow
search performance when there are lots of cached inode. The slowness
is from the limited count of slots showing that, in average,
(inode count / slots) times hlist traverse, and it becomes worse when
cached inode count increases by such as directory iteration.
To improve this, change from hash table to rb tree to minimize
searching/iterate time which enables O(logN) time complexity.

Test : "time ls -l"
 +--------+------------+------------+-----------+
 |        | file count | fresh read |   cached  |
 +--------+------------+------------+-----------+
 | Before |     50,000 |   0m06.59s |  0m01.58s |
 +--------+------------+------------+-----------+
 | After  |     50,000 |   0m05.20s |  0m00.98s |
 +--------+------------+------------+-----------+
 | Before |    300,000 |   1m28.97s |  0m31.69s |
 +--------+------------+------------+-----------+
 | After  |    300,000 |   0m33.11s |  0m06.21s |
 +--------+------------+------------+-----------+

Signed-off-by: Hyeongseok Kim <hyeongseok@xxxxxxxxx>
---
 fs/exfat/exfat_fs.h | 12 +++---
 fs/exfat/inode.c    | 91 +++++++++++++++++++++++++++++++--------------
 fs/exfat/namei.c    | 10 ++---
 fs/exfat/super.c    | 16 ++++----
 4 files changed, 82 insertions(+), 47 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 1d6da61157c9..f8ad8cbf8499 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -243,8 +243,8 @@ struct exfat_sb_info {
 	struct nls_table *nls_io; /* Charset used for input and display */
 	struct ratelimit_state ratelimit;
 
-	spinlock_t inode_hash_lock;
-	struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
+	spinlock_t inode_tree_lock; /* lock for inode_tree structure */
+	struct rb_root inode_tree;
 
 	struct rcu_head rcu;
 };
@@ -289,8 +289,8 @@ struct exfat_inode_info {
 	loff_t i_size_aligned;
 	/* on-disk position of directory entry or 0 */
 	loff_t i_pos;
-	/* hash by i_location */
-	struct hlist_node i_hash_fat;
+	/* tree by i_location */
+	struct rb_node rbnode;
 	/* protect bmap against truncate */
 	struct rw_semaphore truncate_lock;
 	struct inode vfs_inode;
@@ -476,8 +476,8 @@ extern const struct inode_operations exfat_file_inode_operations;
 void exfat_sync_inode(struct inode *inode);
 struct inode *exfat_build_inode(struct super_block *sb,
 		struct exfat_dir_entry *info, loff_t i_pos);
-void exfat_hash_inode(struct inode *inode, loff_t i_pos);
-void exfat_unhash_inode(struct inode *inode);
+void exfat_inode_tree_insert(struct inode *inode, loff_t i_pos);
+void exfat_inode_tree_erase(struct inode *inode);
 struct inode *exfat_iget(struct super_block *sb, loff_t i_pos);
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc);
 void exfat_evict_inode(struct inode *inode);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1803ef3220fd..740a34f528ae 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -501,50 +501,87 @@ static const struct address_space_operations exfat_aops = {
 	.bmap		= exfat_aop_bmap
 };
 
-static inline unsigned long exfat_hash(loff_t i_pos)
+static struct exfat_inode_info *exfat_inode_tree_find(struct super_block *sb,
+						      loff_t i_pos)
 {
-	return hash_32(i_pos, EXFAT_HASH_BITS);
+	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	struct rb_node *node = sbi->inode_tree.rb_node;
+	struct exfat_inode_info *info;
+
+	spin_lock(&sbi->inode_tree_lock);
+	while (node) {
+		info = rb_entry(node, struct exfat_inode_info, rbnode);
+		WARN_ON(info->vfs_inode.i_sb != sb);
+
+		if (i_pos == info->i_pos) {
+			spin_unlock(&sbi->inode_tree_lock);
+			return info;
+		}
+
+		if (i_pos < info->i_pos)
+			node = node->rb_left;
+		else /* i_pos > info->i_pos */
+			node = node->rb_right;
+	}
+	spin_unlock(&sbi->inode_tree_lock);
+	return NULL;
 }
 
-void exfat_hash_inode(struct inode *inode, loff_t i_pos)
+void exfat_inode_tree_insert(struct inode *inode, loff_t i_pos)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
-	struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos);
+	struct exfat_inode_info *info, *ei = EXFAT_I(inode);
+	struct rb_root *root = &sbi->inode_tree;
+	struct rb_node **rb_ptr = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	spin_lock(&sbi->inode_tree_lock);
+	ei->i_pos = i_pos;
+	while (*rb_ptr) {
+		parent = *rb_ptr;
+		info = rb_entry(*rb_ptr, struct exfat_inode_info, rbnode);
+		if (i_pos == info->i_pos) {
+			/* already exists */
+			rb_replace_node(*rb_ptr, &ei->rbnode, &sbi->inode_tree);
+			RB_CLEAR_NODE(*rb_ptr);
+			spin_unlock(&sbi->inode_tree_lock);
+			return;
+		}
 
-	spin_lock(&sbi->inode_hash_lock);
-	EXFAT_I(inode)->i_pos = i_pos;
-	hlist_add_head(&EXFAT_I(inode)->i_hash_fat, head);
-	spin_unlock(&sbi->inode_hash_lock);
+		if (i_pos < info->i_pos)
+			rb_ptr = &(*rb_ptr)->rb_left;
+		else /* (i_pos > info->i_pos) */
+			rb_ptr = &(*rb_ptr)->rb_right;
+	}
+
+	rb_link_node(&ei->rbnode, parent, rb_ptr);
+	rb_insert_color(&ei->rbnode, root);
+	spin_unlock(&sbi->inode_tree_lock);
 }
 
-void exfat_unhash_inode(struct inode *inode)
+void exfat_inode_tree_erase(struct inode *inode)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb);
+	struct exfat_inode_info *ei = EXFAT_I(inode);
+	struct rb_root *root = &sbi->inode_tree;
 
-	spin_lock(&sbi->inode_hash_lock);
-	hlist_del_init(&EXFAT_I(inode)->i_hash_fat);
-	EXFAT_I(inode)->i_pos = 0;
-	spin_unlock(&sbi->inode_hash_lock);
+	spin_lock(&sbi->inode_tree_lock);
+	if (!RB_EMPTY_NODE(&ei->rbnode)) {
+		rb_erase(&ei->rbnode, root);
+		RB_CLEAR_NODE(&ei->rbnode);
+	}
+	spin_unlock(&sbi->inode_tree_lock);
 }
 
 struct inode *exfat_iget(struct super_block *sb, loff_t i_pos)
 {
-	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	struct exfat_inode_info *info;
-	struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos);
 	struct inode *inode = NULL;
 
-	spin_lock(&sbi->inode_hash_lock);
-	hlist_for_each_entry(info, head, i_hash_fat) {
-		WARN_ON(info->vfs_inode.i_sb != sb);
-
-		if (i_pos != info->i_pos)
-			continue;
+	info = exfat_inode_tree_find(sb, i_pos);
+	if (info)
 		inode = igrab(&info->vfs_inode);
-		if (inode)
-			break;
-	}
-	spin_unlock(&sbi->inode_hash_lock);
+
 	return inode;
 }
 
@@ -634,7 +671,7 @@ struct inode *exfat_build_inode(struct super_block *sb,
 		inode = ERR_PTR(err);
 		goto out;
 	}
-	exfat_hash_inode(inode, i_pos);
+	exfat_inode_tree_insert(inode, i_pos);
 	insert_inode_hash(inode);
 out:
 	return inode;
@@ -654,5 +691,5 @@ void exfat_evict_inode(struct inode *inode)
 	invalidate_inode_buffers(inode);
 	clear_inode(inode);
 	exfat_cache_inval_inode(inode);
-	exfat_unhash_inode(inode);
+	exfat_inode_tree_erase(inode);
 }
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 24b41103d1cc..10ed9b35fd86 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -827,7 +827,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
 	clear_nlink(inode);
 	inode->i_mtime = inode->i_atime = current_time(inode);
 	exfat_truncate_atime(&inode->i_atime);
-	exfat_unhash_inode(inode);
+	exfat_inode_tree_erase(inode);
 	exfat_d_version_set(dentry, inode_query_iversion(dir));
 unlock:
 	mutex_unlock(&EXFAT_SB(sb)->s_lock);
@@ -993,7 +993,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
 	clear_nlink(inode);
 	inode->i_mtime = inode->i_atime = current_time(inode);
 	exfat_truncate_atime(&inode->i_atime);
-	exfat_unhash_inode(inode);
+	exfat_inode_tree_erase(inode);
 	exfat_d_version_set(dentry, inode_query_iversion(dir));
 unlock:
 	mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock);
@@ -1363,8 +1363,8 @@ static int exfat_rename(struct user_namespace *mnt_userns,
 
 	i_pos = ((loff_t)EXFAT_I(old_inode)->dir.dir << 32) |
 		(EXFAT_I(old_inode)->entry & 0xffffffff);
-	exfat_unhash_inode(old_inode);
-	exfat_hash_inode(old_inode, i_pos);
+	exfat_inode_tree_erase(old_inode);
+	exfat_inode_tree_insert(old_inode, i_pos);
 	if (IS_DIRSYNC(new_dir))
 		exfat_sync_inode(old_inode);
 	else
@@ -1384,7 +1384,7 @@ static int exfat_rename(struct user_namespace *mnt_userns,
 		mark_inode_dirty(old_dir);
 
 	if (new_inode) {
-		exfat_unhash_inode(new_inode);
+		exfat_inode_tree_erase(new_inode);
 
 		/* skip drop_nlink if new_inode already has been dropped */
 		if (new_inode->i_nlink) {
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index d38d17a77e76..8f197432c57b 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -317,14 +317,12 @@ static int exfat_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	return 0;
 }
 
-static void exfat_hash_init(struct super_block *sb)
+static void exfat_inode_tree_init(struct super_block *sb)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
-	int i;
 
-	spin_lock_init(&sbi->inode_hash_lock);
-	for (i = 0; i < EXFAT_HASH_SIZE; i++)
-		INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
+	spin_lock_init(&sbi->inode_tree_lock);
+	sbi->inode_tree = RB_ROOT;
 }
 
 static int exfat_read_root(struct inode *inode)
@@ -648,8 +646,8 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
 		goto check_nls_io;
 	}
 
-	/* set up enough so that it can read an inode */
-	exfat_hash_init(sb);
+	/* set up exfat inode tree */
+	exfat_inode_tree_init(sb);
 
 	if (!strcmp(sbi->options.iocharset, "utf8"))
 		opts->utf8 = 1;
@@ -683,7 +681,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
 		goto put_inode;
 	}
 
-	exfat_hash_inode(root_inode, EXFAT_I(root_inode)->i_pos);
+	exfat_inode_tree_insert(root_inode, EXFAT_I(root_inode)->i_pos);
 	insert_inode_hash(root_inode);
 
 	sb->s_root = d_make_root(root_inode);
@@ -786,7 +784,7 @@ static void exfat_inode_init_once(void *foo)
 	ei->nr_caches = 0;
 	ei->cache_valid_id = EXFAT_CACHE_VALID + 1;
 	INIT_LIST_HEAD(&ei->cache_lru);
-	INIT_HLIST_NODE(&ei->i_hash_fat);
+	RB_CLEAR_NODE(&ei->rbnode);
 	inode_init_once(&ei->vfs_inode);
 }
 
-- 
2.27.0.83.g0313f36




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux