> On 20 Nov 2019, at 21:13, Theodore Y. Ts'o <tytso@xxxxxxx> wrote: > > Hi Alex, > > A couple of comments. First, please separate this patch so that these > two separate pieces of functionality can be reviewed and tested > separately: > This is the first patch of the series. Thanks, Alex >From 81c4b3b5a17d94525bbc6d2d89b20f6618b05bc6 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev <bzzz@xxxxxxxxxxxxx> Date: Thu, 21 Nov 2019 09:53:13 +0300 Subject: [PATCH 1/2] ext4: limit scanning for a good group at first two rounds to prevent situation when 10x-100x thousand of groups are scanned, especially non-initialized groups. Signed-off-by: Alex Zhuravlev <bzzz@xxxxxxxxxxxxx> --- fs/ext4/ext4.h | 2 ++ fs/ext4/mballoc.c | 14 ++++++++++++-- fs/ext4/sysfs.c | 4 ++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 03db3e71676c..d4e47fdad87c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1480,6 +1480,8 @@ struct ext4_sb_info { /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; unsigned long s_mb_last_start; + unsigned int s_mb_toscan0; + unsigned int s_mb_toscan1; /* stats for buddy allocator */ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a3e2767bdf2f..cebd7d8df0b8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2098,7 +2098,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { - ext4_group_t ngroups, group, i; + ext4_group_t ngroups, toscan, group, i; int cr; int err = 0, first_err = 0; struct ext4_sb_info *sbi; @@ -2169,7 +2169,15 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) */ group = ac->ac_g_ex.fe_group; - for (i = 0; i < ngroups; group++, i++) { + /* limit number of groups to scan at the first two rounds + * when we hope to find something really good */ + toscan = ngroups; + if (cr == 0) + toscan = sbi->s_mb_toscan0; + else if (cr == 1) + toscan = sbi->s_mb_toscan1; + + for (i = 0; i < toscan; group++, i++) { int ret = 0; cond_resched(); /* @@ -2872,6 +2880,8 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) bio_put(discard_bio); } } + sbi->s_mb_toscan0 = 1024; + sbi->s_mb_toscan1 = 4096; list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) ext4_free_data_in_buddy(sb, entry); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index eb1efad0e20a..c96ee20f5487 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -198,6 +198,8 @@ EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); EXT4_ATTR(first_error_time, 0444, first_error_time); EXT4_ATTR(last_error_time, 0444, last_error_time); EXT4_ATTR(journal_task, 0444, journal_task); +EXT4_RW_ATTR_SBI_UI(mb_toscan0, s_mb_toscan0); +EXT4_RW_ATTR_SBI_UI(mb_toscan1, s_mb_toscan1); static unsigned int old_bump_val = 128; EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); @@ -228,6 +230,8 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(first_error_time), ATTR_LIST(last_error_time), ATTR_LIST(journal_task), + ATTR_LIST(mb_toscan0), + ATTR_LIST(mb_toscan1), NULL, }; ATTRIBUTE_GROUPS(ext4); -- 2.20.1