[RFC PATCH 15/16] ext4: flush delalloc blocks if no free space

Zhang Yi <yi.zhang@xxxxxxxxxxxxxxx> · Thu, 24 Aug 2023 17:26:18 +0800

From: Zhang Yi <yi.zhang@xxxxxxxxxx>

For delalloc, the reserved metadata blocks count is calculated in the
worst case, so the reservation could be larger than the real needs, that
could lead to return false positive -ENOSPC when claiming free space. So
start a worker to flush delalloc blocks in ext4_should_retry_alloc().
If the s_dirtyclusters_counter is not zero, there may have some delalloc
metadata blocks that could be freed.

Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx>
---
 fs/ext4/balloc.c | 47 +++++++++++++++++++++++++++++++++++++++++------
 fs/ext4/ext4.h   |  5 +++++
 fs/ext4/super.c  | 12 ++++++++++++
 3 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 79b20d6ae39e..e8acc21ef56d 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -667,6 +667,30 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
 		return -ENOSPC;
 }
 
+void ext4_writeback_da_blocks(struct work_struct *work)
+{
+	struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
+						s_da_flush_work);
+
+	try_to_writeback_inodes_sb(sbi->s_sb, WB_REASON_FS_FREE_SPACE);
+}
+
+/*
+ * Writeback delallocated blocks and try to free unused reserved extent
+ * blocks, return 0 if no delalloc blocks need to writeback, 1 otherwise.
+ */
+static int ext4_flush_da_blocks(struct ext4_sb_info *sbi)
+{
+	if (!percpu_counter_read_positive(&sbi->s_dirtyclusters_counter) &&
+	    !percpu_counter_sum(&sbi->s_dirtyclusters_counter))
+		return 0;
+
+	if (!work_busy(&sbi->s_da_flush_work))
+		queue_work(sbi->s_da_flush_wq, &sbi->s_da_flush_work);
+	flush_work(&sbi->s_da_flush_work);
+	return 1;
+}
+
 /**
  * ext4_should_retry_alloc() - check if a block allocation should be retried
  * @sb:			superblock
@@ -681,15 +705,22 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
 int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-
-	if (!sbi->s_journal)
-		return 0;
+	int result = 0;
 
 	if (++(*retries) > 3) {
 		percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
 		return 0;
 	}
 
+	/*
+	 * Flush allocated delalloc blocks and try to free unused
+	 * reserved extent blocks.
+	 */
+	if (test_opt(sb, DELALLOC))
+		result += ext4_flush_da_blocks(sbi);
+
+	if (!sbi->s_journal)
+		goto out;
 	/*
 	 * if there's no indication that blocks are about to be freed it's
 	 * possible we just missed a transaction commit that did so
@@ -701,16 +732,20 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 			flush_work(&sbi->s_discard_work);
 			atomic_dec(&sbi->s_retry_alloc_pending);
 		}
-		return ext4_has_free_clusters(sbi, 1, 0);
+		result += ext4_has_free_clusters(sbi, 1, 0);
+		goto out;
 	}
 
 	/*
 	 * it's possible we've just missed a transaction commit here,
 	 * so ignore the returned status
 	 */
-	ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
+	result += 1;
 	(void) jbd2_journal_force_commit_nested(sbi->s_journal);
-	return 1;
+out:
+	if (result)
+		ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
+	return result;
 }
 
 /*
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 67b12f9ffc50..6f4259ea6751 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1627,6 +1627,10 @@ struct ext4_sb_info {
 	/* workqueue for reserved extent conversions (buffered io) */
 	struct workqueue_struct *rsv_conversion_wq;
 
+	/* workqueue for delalloc buffer IO flushing */
+	struct workqueue_struct *s_da_flush_wq;
+	struct work_struct s_da_flush_work;
+
 	/* timer for periodic error stats printing */
 	struct timer_list s_err_report;
 
@@ -2716,6 +2720,7 @@ extern int ext4_wait_block_bitmap(struct super_block *sb,
 				  struct buffer_head *bh);
 extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
 						  ext4_group_t block_group);
+extern void ext4_writeback_da_blocks(struct work_struct *work);
 extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
 					      ext4_group_t block_group,
 					      struct ext4_group_desc *gdp);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7bc7c8c0ed71..6f50975ba42e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1335,6 +1335,8 @@ static void ext4_put_super(struct super_block *sb)
 
 	flush_work(&sbi->s_sb_upd_work);
 	destroy_workqueue(sbi->rsv_conversion_wq);
+	flush_work(&sbi->s_da_flush_work);
+	destroy_workqueue(sbi->s_da_flush_wq);
 	ext4_release_orphan_info(sb);
 
 	if (sbi->s_journal) {
@@ -5491,6 +5493,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
 		goto failed_mount4;
 	}
 
+	INIT_WORK(&sbi->s_da_flush_work, ext4_writeback_da_blocks);
+	sbi->s_da_flush_wq = alloc_workqueue("ext4_delalloc_flush", WQ_UNBOUND, 1);
+	if (!sbi->s_da_flush_wq) {
+		printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
+		err = -ENOMEM;
+		goto failed_mount4;
+	}
+
 	/*
 	 * The jbd2_journal_load will have done any necessary log recovery,
 	 * so we can safely mount the rest of the filesystem now.
@@ -5660,6 +5670,8 @@ failed_mount9: __maybe_unused
 	sb->s_root = NULL;
 failed_mount4:
 	ext4_msg(sb, KERN_ERR, "mount failed");
+	if (sbi->s_da_flush_wq)
+		destroy_workqueue(sbi->s_da_flush_wq);
 	if (EXT4_SB(sb)->rsv_conversion_wq)
 		destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
 failed_mount_wq:
-- 
2.39.2