From: Zhang Yi <yi.zhang@xxxxxxxxxx> For delalloc, the reserved metadata blocks count is calculated in the worst case, so the reservation could be larger than the real needs, that could lead to return false positive -ENOSPC when claiming free space. So start a worker to flush delalloc blocks in ext4_should_retry_alloc(). If the s_dirtyclusters_counter is not zero, there may have some delalloc metadata blocks that could be freed. Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx> --- fs/ext4/balloc.c | 47 +++++++++++++++++++++++++++++++++++++++++------ fs/ext4/ext4.h | 5 +++++ fs/ext4/super.c | 12 ++++++++++++ 3 files changed, 58 insertions(+), 6 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 79b20d6ae39e..e8acc21ef56d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -667,6 +667,30 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, return -ENOSPC; } +void ext4_writeback_da_blocks(struct work_struct *work) +{ + struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, + s_da_flush_work); + + try_to_writeback_inodes_sb(sbi->s_sb, WB_REASON_FS_FREE_SPACE); +} + +/* + * Writeback delallocated blocks and try to free unused reserved extent + * blocks, return 0 if no delalloc blocks need to writeback, 1 otherwise. + */ +static int ext4_flush_da_blocks(struct ext4_sb_info *sbi) +{ + if (!percpu_counter_read_positive(&sbi->s_dirtyclusters_counter) && + !percpu_counter_sum(&sbi->s_dirtyclusters_counter)) + return 0; + + if (!work_busy(&sbi->s_da_flush_work)) + queue_work(sbi->s_da_flush_wq, &sbi->s_da_flush_work); + flush_work(&sbi->s_da_flush_work); + return 1; +} + /** * ext4_should_retry_alloc() - check if a block allocation should be retried * @sb: superblock @@ -681,15 +705,22 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi, int ext4_should_retry_alloc(struct super_block *sb, int *retries) { struct ext4_sb_info *sbi = EXT4_SB(sb); - - if (!sbi->s_journal) - return 0; + int result = 0; if (++(*retries) > 3) { percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit); return 0; } + /* + * Flush allocated delalloc blocks and try to free unused + * reserved extent blocks. + */ + if (test_opt(sb, DELALLOC)) + result += ext4_flush_da_blocks(sbi); + + if (!sbi->s_journal) + goto out; /* * if there's no indication that blocks are about to be freed it's * possible we just missed a transaction commit that did so @@ -701,16 +732,20 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) flush_work(&sbi->s_discard_work); atomic_dec(&sbi->s_retry_alloc_pending); } - return ext4_has_free_clusters(sbi, 1, 0); + result += ext4_has_free_clusters(sbi, 1, 0); + goto out; } /* * it's possible we've just missed a transaction commit here, * so ignore the returned status */ - ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id); + result += 1; (void) jbd2_journal_force_commit_nested(sbi->s_journal); - return 1; +out: + if (result) + ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id); + return result; } /* diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 67b12f9ffc50..6f4259ea6751 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1627,6 +1627,10 @@ struct ext4_sb_info { /* workqueue for reserved extent conversions (buffered io) */ struct workqueue_struct *rsv_conversion_wq; + /* workqueue for delalloc buffer IO flushing */ + struct workqueue_struct *s_da_flush_wq; + struct work_struct s_da_flush_work; + /* timer for periodic error stats printing */ struct timer_list s_err_report; @@ -2716,6 +2720,7 @@ extern int ext4_wait_block_bitmap(struct super_block *sb, struct buffer_head *bh); extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group); +extern void ext4_writeback_da_blocks(struct work_struct *work); extern unsigned ext4_free_clusters_after_init(struct super_block *sb, ext4_group_t block_group, struct ext4_group_desc *gdp); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7bc7c8c0ed71..6f50975ba42e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1335,6 +1335,8 @@ static void ext4_put_super(struct super_block *sb) flush_work(&sbi->s_sb_upd_work); destroy_workqueue(sbi->rsv_conversion_wq); + flush_work(&sbi->s_da_flush_work); + destroy_workqueue(sbi->s_da_flush_wq); ext4_release_orphan_info(sb); if (sbi->s_journal) { @@ -5491,6 +5493,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount4; } + INIT_WORK(&sbi->s_da_flush_work, ext4_writeback_da_blocks); + sbi->s_da_flush_wq = alloc_workqueue("ext4_delalloc_flush", WQ_UNBOUND, 1); + if (!sbi->s_da_flush_wq) { + printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); + err = -ENOMEM; + goto failed_mount4; + } + /* * The jbd2_journal_load will have done any necessary log recovery, * so we can safely mount the rest of the filesystem now. @@ -5660,6 +5670,8 @@ failed_mount9: __maybe_unused sb->s_root = NULL; failed_mount4: ext4_msg(sb, KERN_ERR, "mount failed"); + if (sbi->s_da_flush_wq) + destroy_workqueue(sbi->s_da_flush_wq); if (EXT4_SB(sb)->rsv_conversion_wq) destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); failed_mount_wq: -- 2.39.2