From: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> Freespace trees can occupy a lot of memory with as the fragmentation increases. This patch adds a sysfs file to monitor the memory usage of the freespace tree allocator. Also, added a sysfs config to control maximum memory that the allocator can use. If the allocator exceeds this threshold, file system enters "FRSP_MEM_CRUNCH" state. The next patch in the series performs LRU eviction when this state is reached. Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> --- fs/ext4/ext4.h | 8 ++++++++ fs/ext4/mballoc.c | 20 ++++++++++++++++++++ fs/ext4/mballoc.h | 4 ++++ fs/ext4/sysfs.c | 11 +++++++++++ 4 files changed, 43 insertions(+) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 15e6ce9f1afa..93bf2fe35cf1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1223,6 +1223,12 @@ struct ext4_inode_info { * allocator off) */ +#define EXT4_MOUNT2_FRSP_MEM_CRUNCH 0x00000040 /* + * Freespace tree allocator + * is in a tight memory + * situation. + */ + #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ ~EXT4_MOUNT_##opt #define set_opt(sb, opt) EXT4_SB(sb)->s_mount_opt |= \ @@ -1607,6 +1613,8 @@ struct ext4_sb_info { atomic_t s_mb_num_frsp_trees_cached; struct list_head s_mb_uncached_trees; u32 s_mb_frsp_cache_aggression; + atomic_t s_mb_num_fragments; + u32 s_mb_frsp_mem_limit; /* workqueue for reserved extent conversions (buffered io) */ struct workqueue_struct *rsv_conversion_wq; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1da63afdbb3d..b28b7fb0506e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -869,6 +869,7 @@ void ext4_mb_frsp_print_tree_len(struct super_block *sb, static struct ext4_frsp_node *ext4_mb_frsp_alloc_node(struct super_block *sb) { struct ext4_frsp_node *node; + struct ext4_sb_info *sbi = EXT4_SB(sb); node = kmem_cache_alloc(ext4_freespace_node_cachep, GFP_NOFS); if (!node) @@ -877,13 +878,31 @@ static struct ext4_frsp_node *ext4_mb_frsp_alloc_node(struct super_block *sb) RB_CLEAR_NODE(&node->frsp_node); RB_CLEAR_NODE(&node->frsp_len_node); + atomic_inc(&sbi->s_mb_num_fragments); + + if (sbi->s_mb_frsp_mem_limit && + atomic_read(&sbi->s_mb_num_fragments) > + EXT4_FRSP_MEM_LIMIT_TO_NUM_NODES(sb)) + set_opt2(sb, FRSP_MEM_CRUNCH); + else + clear_opt2(sb, FRSP_MEM_CRUNCH); + + return node; } static void ext4_mb_frsp_free_node(struct super_block *sb, struct ext4_frsp_node *node) { + struct ext4_sb_info *sbi = EXT4_SB(sb); + kmem_cache_free(ext4_freespace_node_cachep, node); + atomic_dec(&sbi->s_mb_num_fragments); + + if (!sbi->s_mb_frsp_mem_limit || + atomic_read(&sbi->s_mb_num_fragments) < + EXT4_FRSP_MEM_LIMIT_TO_NUM_NODES(sb)) + clear_opt2(sb, FRSP_MEM_CRUNCH); } /* Evict a tree from memory */ @@ -1607,6 +1626,7 @@ int ext4_mb_init_freespace_trees(struct super_block *sb) } rwlock_init(&sbi->s_mb_frsp_lock); atomic_set(&sbi->s_mb_num_frsp_trees_cached, 0); + atomic_set(&sbi->s_mb_num_fragments, 0); return 0; } diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 1fcdd3e6f7d5..6cfb228e4da2 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -92,6 +92,10 @@ struct ext4_frsp_node { struct rb_node frsp_node; struct rb_node frsp_len_node; }; + +#define EXT4_FRSP_MEM_LIMIT_TO_NUM_NODES(__sb) \ + ((sbi->s_mb_frsp_mem_limit / sizeof(struct ext4_frsp_node))) + struct ext4_free_data { /* this links the free block information from sb_info */ struct list_head efd_list; diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index bfabb799fa45..19301b10944b 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -8,6 +8,7 @@ * */ +#include "mballoc.h" #include <linux/time.h> #include <linux/fs.h> #include <linux/seq_file.h> @@ -24,6 +25,7 @@ typedef enum { attr_session_write_kbytes, attr_lifetime_write_kbytes, attr_reserved_clusters, + attr_frsp_tree_usage, attr_inode_readahead, attr_trigger_test_error, attr_first_error_time, @@ -208,6 +210,7 @@ EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444); EXT4_ATTR_FUNC(session_write_kbytes, 0444); EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444); EXT4_ATTR_FUNC(reserved_clusters, 0644); +EXT4_ATTR_FUNC(frsp_tree_usage, 0444); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); @@ -248,6 +251,7 @@ EXT4_ATTR(last_error_time, 0444, last_error_time); EXT4_ATTR(journal_task, 0444, journal_task); EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch); EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit); +EXT4_RW_ATTR_SBI_UI(mb_frsp_max_mem, s_mb_frsp_mem_limit); static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); @@ -257,6 +261,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(reserved_clusters), + ATTR_LIST(frsp_tree_usage), ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), @@ -296,6 +301,7 @@ static struct attribute *ext4_attrs[] = { #endif ATTR_LIST(mb_prefetch), ATTR_LIST(mb_prefetch_limit), + ATTR_LIST(mb_frsp_max_mem), NULL, }; ATTRIBUTE_GROUPS(ext4); @@ -378,6 +384,11 @@ static ssize_t ext4_attr_show(struct kobject *kobj, return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long) atomic64_read(&sbi->s_resv_clusters)); + case attr_frsp_tree_usage: + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long) + atomic_read(&sbi->s_mb_num_fragments) * + sizeof(struct ext4_frsp_node)); case attr_inode_readahead: case attr_pointer_ui: if (!ptr) -- 2.28.0.297.g1956fa8f8d-goog