On Jun 22, 2020, at 8:16 AM, Wang Shilong <wangshilong1991@xxxxxxxxx> wrote: > > From: Wang Shilong <wshilong@xxxxxxx> > > Now WAS_TRIMMED flag will be cleared if there are any blocks > freed in this block group, this might be not good idea if there > are only few blocks freed, since most of freed blocks have been > issued discard before. > > So this patch tries to introduce another counter which record > how many blocks freed since last time trimmed, WAS_TRIMMED flag > will be only cleared if there are enough free blocks(default 128). > > Also expose a new sys interface min_freed_blocks_to_trim to tune > default behavior. > > Cc: Shuichi Ihara <sihara@xxxxxxx> > Cc: Andreas Dilger <adilger@xxxxxxxxx> > Cc: Wang Shilong <wangshilong1991@xxxxxxxxx> > Signed-off-by: Wang Shilong <wshilong@xxxxxxx> Reviewed-by: Andreas Dilger <adilger@xxxxxxxxx> > --- > changelog v1->v2: > init bb_freed_last_trimmed to be zero during setup > --- > fs/ext4/ext4.h | 7 +++++++ > fs/ext4/mballoc.c | 18 ++++++++++++++++-- > fs/ext4/sysfs.c | 2 ++ > 3 files changed, 25 insertions(+), 2 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 252754da2f1b..2da86d1ebe3f 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -1240,6 +1240,9 @@ extern void ext4_set_bits(void *bm, int cur, int len); > /* Metadata checksum algorithm codes */ > #define EXT4_CRC32C_CHKSUM 1 > > +/* Default min freed blocks which we could clear TRIMMED flags */ > +#define DEFAULT_MIN_FREED_BLOCKS_TO_TRIM 128 > + > /* > * Structure of the super block > */ > @@ -1533,6 +1536,9 @@ struct ext4_sb_info { > /* the size of zero-out chunk */ > unsigned int s_extent_max_zeroout_kb; > > + /* Min freed blocks per group that we could run trim on it*/ > + unsigned long s_min_freed_blocks_to_trim; > + > unsigned int s_log_groups_per_flex; > struct flex_groups * __rcu *s_flex_groups; > ext4_group_t s_flex_groups_allocated; > @@ -3125,6 +3131,7 @@ struct ext4_group_info { > struct rb_root bb_free_root; > ext4_grpblk_t bb_first_free; /* first free block */ > ext4_grpblk_t bb_free; /* total free blocks */ > + ext4_grpblk_t bb_freed_last_trimmed; /* total free blocks since last trimmed*/ > ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ > ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ > struct list_head bb_prealloc_list; > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index 235a316584d0..52ab9ac5be86 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -2558,6 +2558,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, > init_rwsem(&meta_group_info[i]->alloc_sem); > meta_group_info[i]->bb_free_root = RB_ROOT; > meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ > + meta_group_info[i]->bb_freed_last_trimmed = 0; > > mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group); > return 0; > @@ -2763,6 +2764,8 @@ int ext4_mb_init(struct super_block *sb) > sbi->s_mb_group_prealloc, sbi->s_stripe); > } > > + sbi->s_min_freed_blocks_to_trim = DEFAULT_MIN_FREED_BLOCKS_TO_TRIM; > + > sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); > if (sbi->s_locality_groups == NULL) { > ret = -ENOMEM; > @@ -5091,8 +5094,18 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, > * If the volume is mounted with -o discard, online discard > * is supported and the free blocks will be trimmed online. > */ > - if (!test_opt(sb, DISCARD)) > - EXT4_MB_GDP_CLEAR_TRIMMED(gdp); > + if (!test_opt(sb, DISCARD)) { > + e4b.bd_info->bb_freed_last_trimmed += count; > + /* > + * Only clear the WAS_TRIMMED flag if there are > + * several blocks freed, or if the group becomes > + * totally 'empty'(free < num_itable_blocks + 2). > + */ > + if (e4b.bd_info->bb_freed_last_trimmed >= > + sbi->s_min_freed_blocks_to_trim || > + e4b.bd_info->bb_free < (sbi->s_itb_per_group + 2)) > + EXT4_MB_GDP_CLEAR_TRIMMED(gdp); > + } > ext4_group_desc_csum_set(sb, block_group, gdp); > ext4_unlock_group(sb, block_group); > > @@ -5425,6 +5438,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, > } > ext4_lock_group(sb, group); > EXT4_MB_GDP_SET_TRIMMED(gdp); > + e4b.bd_info->bb_freed_last_trimmed = 0; > ext4_group_desc_csum_set(sb, group, gdp); > ext4_unlock_group(sb, group); > err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); > diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c > index 6c9fc9e21c13..8ee4e7e3f125 100644 > --- a/fs/ext4/sysfs.c > +++ b/fs/ext4/sysfs.c > @@ -216,6 +216,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); > EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); > EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); > EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); > +EXT4_RW_ATTR_SBI_UI(min_freed_blocks_to_trim, s_min_freed_blocks_to_trim); > EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); > EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); > EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); > @@ -259,6 +260,7 @@ static struct attribute *ext4_attrs[] = { > ATTR_LIST(mb_group_prealloc), > ATTR_LIST(max_writeback_mb_bump), > ATTR_LIST(extent_max_zeroout_kb), > + ATTR_LIST(min_freed_blocks_to_trim), > ATTR_LIST(trigger_fs_error), > ATTR_LIST(err_ratelimit_interval_ms), > ATTR_LIST(err_ratelimit_burst), > -- > 2.25.4 > Cheers, Andreas
Attachment:
signature.asc
Description: Message signed with OpenPGP