From: Wang Shilong <wshilong@xxxxxxx> During our benchmarking, we found sometimes writing performances are not stable enough and there are some small read during write which could drop throughput(~30%). It turned out that block bitmaps loading could make some latency here,also for a heavy fragmented filesystem, we might need load many bitmaps to find some free blocks. To improve above situation, we had a patch to load block bitmaps to memory and pin those bitmaps memory until umount or we release the memory on purpose, this could stable write performances and improve performances of a heavy fragmented filesystem. Tested-by: Shuichi Ihara <sihara@xxxxxxx> Signed-off-by: Wang Shilong <wshilong@xxxxxxx> --- fs/ext4/balloc.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/ext4.h | 12 +++++++ fs/ext4/super.c | 3 ++ fs/ext4/sysfs.c | 26 ++++++++++++++ 4 files changed, 146 insertions(+) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index b00481c..ceb63e8 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -505,6 +505,8 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, EXT4_GROUP_INFO_BBITMAP_CORRUPT); return -EIO; } + /* race is fine */ + EXT4_SB(sb)->bbitmaps_read_cnt++; clear_buffer_new(bh); /* Panic or remount fs read-only if block bitmap is invalid */ return ext4_validate_block_bitmap(sb, desc, block_group, bh); @@ -660,6 +662,109 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, return ret; } +int ext4_load_block_bitmaps_bh(struct super_block *sb, unsigned int op) +{ + struct buffer_head *bitmap_bh; + struct ext4_group_desc *gdp; + ext4_group_t i, j; + ext4_group_t ngroups = ext4_get_groups_count(sb); + ext4_group_t cnt = 0; + + if (op < EXT4_LOAD_BBITMAPS || op > EXT4_PIN_BBITMAPS) + return -EINVAL; + + mutex_lock(&EXT4_SB(sb)->s_load_bbitmaps_lock); + /* don't pin bitmaps several times */ + if (EXT4_SB(sb)->s_load_bbitmaps == EXT4_PIN_BBITMAPS) { + mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock); + return 0; + } + + for (i = 0; i < ngroups; i++) { + gdp = ext4_get_group_desc(sb, i, NULL); + if (!gdp) + continue; + /* Load is simple, we could tolerate any + * errors and continue to handle, but for + * pin we return directly for simple handling + * in unpin codes, otherwiese we need remember + * which block bitmaps we pin exactly. + */ + bitmap_bh = ext4_read_block_bitmap(sb, i); + if (IS_ERR(bitmap_bh)) { + if (op == EXT4_LOAD_BBITMAPS) + continue; + else + goto failed; + } + if (op == EXT4_LOAD_BBITMAPS) + brelse(bitmap_bh); + cnt++; + } + /* Reset block bitmap to zero now */ + EXT4_SB(sb)->bbitmaps_read_cnt = 0; + ext4_msg(sb, KERN_INFO, "%s %u block bitmaps finished", + op == EXT4_PIN_BBITMAPS ? "pin" : "load", cnt); + EXT4_SB(sb)->s_load_bbitmaps = EXT4_PIN_BBITMAPS; + mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock); + + return 0; +failed: + for (j = 0; j < i; j++) { + gdp = ext4_get_group_desc(sb, i, NULL); + if (!gdp) + continue; + bitmap_bh = ext4_read_block_bitmap(sb, i); + if (!IS_ERR(bitmap_bh)) { + brelse(bitmap_bh); + brelse(bitmap_bh); + } + } + mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock); + return PTR_ERR(bitmap_bh); +} + +void ext4_unpin_block_bitmaps_bh(struct super_block *sb) +{ + struct buffer_head *bitmap_bh; + struct ext4_group_desc *gdp; + ext4_group_t i; + ext4_group_t ngroups = ext4_get_groups_count(sb); + ext4_group_t cnt = 0; + + mutex_lock(&EXT4_SB(sb)->s_load_bbitmaps_lock); + if (EXT4_SB(sb)->s_load_bbitmaps == EXT4_UNPIN_BBITMAPS) { + mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock); + return; + } + + ext4_msg(sb, KERN_INFO, + "Read block block bitmaps: %lu afer %s", + EXT4_SB(sb)->bbitmaps_read_cnt, + EXT4_SB(sb)->s_load_bbitmaps == EXT4_PIN_BBITMAPS ? + "pin" : "load"); + + if (EXT4_SB(sb)->s_load_bbitmaps != EXT4_PIN_BBITMAPS) { + mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock); + return; + } + + for (i = 0; i < ngroups; i++) { + gdp = ext4_get_group_desc(sb, i, NULL); + if (!gdp) + continue; + bitmap_bh = ext4_read_block_bitmap(sb, i); + if (IS_ERR(bitmap_bh)) + continue; + brelse(bitmap_bh); + brelse(bitmap_bh); + cnt++; + } + ext4_msg(sb, KERN_INFO, "Unpin %u lock bitmaps finished", cnt); + EXT4_SB(sb)->s_load_bbitmaps = EXT4_UNPIN_BBITMAPS; + mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock); +} + /** * ext4_count_free_clusters() -- count filesystem free clusters * @sb: superblock diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fa52b7d..4f9ee73 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1317,6 +1317,12 @@ struct ext4_super_block { /* Number of quota types we support */ #define EXT4_MAXQUOTAS 3 +enum { + EXT4_UNPIN_BBITMAPS = 0, + EXT4_LOAD_BBITMAPS, + EXT4_PIN_BBITMAPS, +}; + /* * fourth extended-fs super-block data in memory */ @@ -1487,6 +1493,10 @@ struct ext4_sb_info { /* Barrier between changing inodes' journal flags and writepages ops. */ struct percpu_rw_semaphore s_journal_flag_rwsem; struct dax_device *s_daxdev; + + struct mutex s_load_bbitmaps_lock; + unsigned long bbitmaps_read_cnt; + unsigned int s_load_bbitmaps; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -2224,6 +2234,8 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group, struct buffer_head *bh); /* balloc.c */ +int ext4_load_block_bitmaps_bh(struct super_block *sb, unsigned int op); +void ext4_unpin_block_bitmaps_bh(struct super_block *sb); extern void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, ext4_group_t *blockgrpp, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1388e56..b3e896f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -902,6 +902,7 @@ static void ext4_put_super(struct super_block *sb) int aborted = 0; int i, err; + ext4_unpin_block_bitmaps_bh(sb); ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); @@ -4393,6 +4394,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); + mutex_init(&EXT4_SB(sb)->s_load_bbitmaps_lock); + kfree(orig_data); return 0; diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 9ebd26c..89396b3f 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -23,6 +23,7 @@ attr_session_write_kbytes, attr_lifetime_write_kbytes, attr_reserved_clusters, + attr_load_bbitmaps, attr_inode_readahead, attr_trigger_test_error, attr_feature, @@ -105,6 +106,24 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi, return count; } +static ssize_t load_bbitmaps_store(struct ext4_sb_info *sbi, + const char *buf, size_t count) +{ + unsigned long long val; + int ret; + + ret = kstrtoull(skip_spaces(buf), 0, &val); + if (ret || val > EXT4_PIN_BBITMAPS) + return -EINVAL; + + if (val == EXT4_UNPIN_BBITMAPS) + ext4_unpin_block_bitmaps_bh(sbi->s_sb); + else if (val > EXT4_UNPIN_BBITMAPS) + ret = ext4_load_block_bitmaps_bh(sbi->s_sb, val); + + return ret ? ret : count; +} + static ssize_t trigger_test_error(struct ext4_sb_info *sbi, const char *buf, size_t count) { @@ -163,6 +182,7 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi, EXT4_ATTR_FUNC(session_write_kbytes, 0444); EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444); EXT4_ATTR_FUNC(reserved_clusters, 0644); +EXT4_ATTR_FUNC(load_bbitmaps, 0644); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); @@ -193,6 +213,7 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi, ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(reserved_clusters), + ATTR_LIST(load_bbitmaps), ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), @@ -270,6 +291,9 @@ static ssize_t ext4_attr_show(struct kobject *kobj, return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); + case attr_load_bbitmaps: + return snprintf(buf, PAGE_SIZE, "%u\n", + sbi->s_load_bbitmaps); case attr_inode_readahead: case attr_pointer_ui: if (!ptr) @@ -302,6 +326,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj, switch (a->attr_id) { case attr_reserved_clusters: return reserved_clusters_store(sbi, buf, len); + case attr_load_bbitmaps: + return load_bbitmaps_store(sbi, buf, len); case attr_pointer_ui: if (!ptr) return 0; -- 1.8.3.1