Hashtable for identifying inode by i_pos (cluster+dentry) have slow search performance when there are lots of cached inode. The slowness is from the limited count of slots showing that, in average, (inode count / slots) times hlist traverse, and it becomes worse when cached inode count increases by such as directory iteration. To improve this, change from hash table to rb tree to minimize searching/iterate time which enables O(logN) time complexity. Test : "time ls -l" +--------+------------+------------+-----------+ | | file count | fresh read | cached | +--------+------------+------------+-----------+ | Before | 50,000 | 0m06.59s | 0m01.58s | +--------+------------+------------+-----------+ | After | 50,000 | 0m05.20s | 0m00.98s | +--------+------------+------------+-----------+ | Before | 300,000 | 1m28.97s | 0m31.69s | +--------+------------+------------+-----------+ | After | 300,000 | 0m33.11s | 0m06.21s | +--------+------------+------------+-----------+ Signed-off-by: Hyeongseok Kim <hyeongseok@xxxxxxxxx> --- fs/exfat/exfat_fs.h | 12 +++--- fs/exfat/inode.c | 91 +++++++++++++++++++++++++++++++-------------- fs/exfat/namei.c | 10 ++--- fs/exfat/super.c | 16 ++++---- 4 files changed, 82 insertions(+), 47 deletions(-) diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 1d6da61157c9..f8ad8cbf8499 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -243,8 +243,8 @@ struct exfat_sb_info { struct nls_table *nls_io; /* Charset used for input and display */ struct ratelimit_state ratelimit; - spinlock_t inode_hash_lock; - struct hlist_head inode_hashtable[EXFAT_HASH_SIZE]; + spinlock_t inode_tree_lock; /* lock for inode_tree structure */ + struct rb_root inode_tree; struct rcu_head rcu; }; @@ -289,8 +289,8 @@ struct exfat_inode_info { loff_t i_size_aligned; /* on-disk position of directory entry or 0 */ loff_t i_pos; - /* hash by i_location */ - struct hlist_node i_hash_fat; + /* tree by i_location */ + struct rb_node rbnode; /* protect bmap against truncate */ struct rw_semaphore truncate_lock; struct inode vfs_inode; @@ -476,8 +476,8 @@ extern const struct inode_operations exfat_file_inode_operations; void exfat_sync_inode(struct inode *inode); struct inode *exfat_build_inode(struct super_block *sb, struct exfat_dir_entry *info, loff_t i_pos); -void exfat_hash_inode(struct inode *inode, loff_t i_pos); -void exfat_unhash_inode(struct inode *inode); +void exfat_inode_tree_insert(struct inode *inode, loff_t i_pos); +void exfat_inode_tree_erase(struct inode *inode); struct inode *exfat_iget(struct super_block *sb, loff_t i_pos); int exfat_write_inode(struct inode *inode, struct writeback_control *wbc); void exfat_evict_inode(struct inode *inode); diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 1803ef3220fd..740a34f528ae 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -501,50 +501,87 @@ static const struct address_space_operations exfat_aops = { .bmap = exfat_aop_bmap }; -static inline unsigned long exfat_hash(loff_t i_pos) +static struct exfat_inode_info *exfat_inode_tree_find(struct super_block *sb, + loff_t i_pos) { - return hash_32(i_pos, EXFAT_HASH_BITS); + struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct rb_node *node = sbi->inode_tree.rb_node; + struct exfat_inode_info *info; + + spin_lock(&sbi->inode_tree_lock); + while (node) { + info = rb_entry(node, struct exfat_inode_info, rbnode); + WARN_ON(info->vfs_inode.i_sb != sb); + + if (i_pos == info->i_pos) { + spin_unlock(&sbi->inode_tree_lock); + return info; + } + + if (i_pos < info->i_pos) + node = node->rb_left; + else /* i_pos > info->i_pos */ + node = node->rb_right; + } + spin_unlock(&sbi->inode_tree_lock); + return NULL; } -void exfat_hash_inode(struct inode *inode, loff_t i_pos) +void exfat_inode_tree_insert(struct inode *inode, loff_t i_pos) { struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); - struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos); + struct exfat_inode_info *info, *ei = EXFAT_I(inode); + struct rb_root *root = &sbi->inode_tree; + struct rb_node **rb_ptr = &root->rb_node; + struct rb_node *parent = NULL; + + spin_lock(&sbi->inode_tree_lock); + ei->i_pos = i_pos; + while (*rb_ptr) { + parent = *rb_ptr; + info = rb_entry(*rb_ptr, struct exfat_inode_info, rbnode); + if (i_pos == info->i_pos) { + /* already exists */ + rb_replace_node(*rb_ptr, &ei->rbnode, &sbi->inode_tree); + RB_CLEAR_NODE(*rb_ptr); + spin_unlock(&sbi->inode_tree_lock); + return; + } - spin_lock(&sbi->inode_hash_lock); - EXFAT_I(inode)->i_pos = i_pos; - hlist_add_head(&EXFAT_I(inode)->i_hash_fat, head); - spin_unlock(&sbi->inode_hash_lock); + if (i_pos < info->i_pos) + rb_ptr = &(*rb_ptr)->rb_left; + else /* (i_pos > info->i_pos) */ + rb_ptr = &(*rb_ptr)->rb_right; + } + + rb_link_node(&ei->rbnode, parent, rb_ptr); + rb_insert_color(&ei->rbnode, root); + spin_unlock(&sbi->inode_tree_lock); } -void exfat_unhash_inode(struct inode *inode) +void exfat_inode_tree_erase(struct inode *inode) { struct exfat_sb_info *sbi = EXFAT_SB(inode->i_sb); + struct exfat_inode_info *ei = EXFAT_I(inode); + struct rb_root *root = &sbi->inode_tree; - spin_lock(&sbi->inode_hash_lock); - hlist_del_init(&EXFAT_I(inode)->i_hash_fat); - EXFAT_I(inode)->i_pos = 0; - spin_unlock(&sbi->inode_hash_lock); + spin_lock(&sbi->inode_tree_lock); + if (!RB_EMPTY_NODE(&ei->rbnode)) { + rb_erase(&ei->rbnode, root); + RB_CLEAR_NODE(&ei->rbnode); + } + spin_unlock(&sbi->inode_tree_lock); } struct inode *exfat_iget(struct super_block *sb, loff_t i_pos) { - struct exfat_sb_info *sbi = EXFAT_SB(sb); struct exfat_inode_info *info; - struct hlist_head *head = sbi->inode_hashtable + exfat_hash(i_pos); struct inode *inode = NULL; - spin_lock(&sbi->inode_hash_lock); - hlist_for_each_entry(info, head, i_hash_fat) { - WARN_ON(info->vfs_inode.i_sb != sb); - - if (i_pos != info->i_pos) - continue; + info = exfat_inode_tree_find(sb, i_pos); + if (info) inode = igrab(&info->vfs_inode); - if (inode) - break; - } - spin_unlock(&sbi->inode_hash_lock); + return inode; } @@ -634,7 +671,7 @@ struct inode *exfat_build_inode(struct super_block *sb, inode = ERR_PTR(err); goto out; } - exfat_hash_inode(inode, i_pos); + exfat_inode_tree_insert(inode, i_pos); insert_inode_hash(inode); out: return inode; @@ -654,5 +691,5 @@ void exfat_evict_inode(struct inode *inode) invalidate_inode_buffers(inode); clear_inode(inode); exfat_cache_inval_inode(inode); - exfat_unhash_inode(inode); + exfat_inode_tree_erase(inode); } diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 24b41103d1cc..10ed9b35fd86 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -827,7 +827,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = current_time(inode); exfat_truncate_atime(&inode->i_atime); - exfat_unhash_inode(inode); + exfat_inode_tree_erase(inode); exfat_d_version_set(dentry, inode_query_iversion(dir)); unlock: mutex_unlock(&EXFAT_SB(sb)->s_lock); @@ -993,7 +993,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = current_time(inode); exfat_truncate_atime(&inode->i_atime); - exfat_unhash_inode(inode); + exfat_inode_tree_erase(inode); exfat_d_version_set(dentry, inode_query_iversion(dir)); unlock: mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock); @@ -1363,8 +1363,8 @@ static int exfat_rename(struct user_namespace *mnt_userns, i_pos = ((loff_t)EXFAT_I(old_inode)->dir.dir << 32) | (EXFAT_I(old_inode)->entry & 0xffffffff); - exfat_unhash_inode(old_inode); - exfat_hash_inode(old_inode, i_pos); + exfat_inode_tree_erase(old_inode); + exfat_inode_tree_insert(old_inode, i_pos); if (IS_DIRSYNC(new_dir)) exfat_sync_inode(old_inode); else @@ -1384,7 +1384,7 @@ static int exfat_rename(struct user_namespace *mnt_userns, mark_inode_dirty(old_dir); if (new_inode) { - exfat_unhash_inode(new_inode); + exfat_inode_tree_erase(new_inode); /* skip drop_nlink if new_inode already has been dropped */ if (new_inode->i_nlink) { diff --git a/fs/exfat/super.c b/fs/exfat/super.c index d38d17a77e76..8f197432c57b 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -317,14 +317,12 @@ static int exfat_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } -static void exfat_hash_init(struct super_block *sb) +static void exfat_inode_tree_init(struct super_block *sb) { struct exfat_sb_info *sbi = EXFAT_SB(sb); - int i; - spin_lock_init(&sbi->inode_hash_lock); - for (i = 0; i < EXFAT_HASH_SIZE; i++) - INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); + spin_lock_init(&sbi->inode_tree_lock); + sbi->inode_tree = RB_ROOT; } static int exfat_read_root(struct inode *inode) @@ -648,8 +646,8 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) goto check_nls_io; } - /* set up enough so that it can read an inode */ - exfat_hash_init(sb); + /* set up exfat inode tree */ + exfat_inode_tree_init(sb); if (!strcmp(sbi->options.iocharset, "utf8")) opts->utf8 = 1; @@ -683,7 +681,7 @@ static int exfat_fill_super(struct super_block *sb, struct fs_context *fc) goto put_inode; } - exfat_hash_inode(root_inode, EXFAT_I(root_inode)->i_pos); + exfat_inode_tree_insert(root_inode, EXFAT_I(root_inode)->i_pos); insert_inode_hash(root_inode); sb->s_root = d_make_root(root_inode); @@ -786,7 +784,7 @@ static void exfat_inode_init_once(void *foo) ei->nr_caches = 0; ei->cache_valid_id = EXFAT_CACHE_VALID + 1; INIT_LIST_HEAD(&ei->cache_lru); - INIT_HLIST_NODE(&ei->i_hash_fat); + RB_CLEAR_NODE(&ei->rbnode); inode_init_once(&ei->vfs_inode); } -- 2.27.0.83.g0313f36