[RFC][PATCH 4/6] delalloc ENOSPC: avoid free blocks double booking

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



ext4: delalloc block reservation avoid double accounting

From: Mingming cao <cmm@xxxxxxxxxx>

Since fs free blocks counters are already reduced at the block reservation
time, we need to let the underlying block allocation know to avoid
decrease the free blocks counter again when real block allocation finished.

Signed-off-by: Mingming cao <cmm@xxxxxxxxxx>
---
 fs/ext4/dir.c     |    3 ++-
 fs/ext4/ext4.h    |    6 +++++-
 fs/ext4/ext4_i.h  |    1 +
 fs/ext4/extents.c |    2 +-
 fs/ext4/inode.c   |   25 ++++++++++++++++++-------
 fs/ext4/mballoc.c |   13 ++++++++++++-
 fs/ext4/super.c   |    2 ++
 7 files changed, 41 insertions(+), 11 deletions(-)

Index: linux-2.6.26-rc4/fs/ext4/ext4.h
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/ext4.h	2008-06-01 14:22:03.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/ext4.h	2008-06-01 15:04:14.000000000 -0700
@@ -74,6 +74,9 @@
 #define EXT4_MB_HINT_GOAL_ONLY		256
 /* goal is meaningful */
 #define EXT4_MB_HINT_TRY_GOAL		512
+/* blocks already pre-reserved by delayed allocation */
+#define EXT4_MB_DELALLOC_RESERVED      1024
+
 
 struct ext4_allocation_request {
 	/* target inode for block we're allocating */
@@ -1039,6 +1042,7 @@ extern void ext4_mb_free_blocks(handle_t
 
 
 /* inode.c */
+void ext4_da_release_space(struct inode *inode, int used, int to_free);
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
 		struct buffer_head *bh, ext4_fsblk_t blocknr);
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1231,7 +1235,7 @@ extern long ext4_fallocate(struct inode 
 extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
 			sector_t block, unsigned long max_blocks,
 			struct buffer_head *bh, int create,
-			int extend_disksize);
+			int extend_disksize, int flag);
 #endif	/* __KERNEL__ */
 
 #endif	/* _EXT4_H */
Index: linux-2.6.26-rc4/fs/ext4/inode.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/inode.c	2008-06-01 15:04:06.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/inode.c	2008-06-01 15:04:14.000000000 -0700
@@ -973,7 +973,7 @@ out:
  */
 int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
 			unsigned long max_blocks, struct buffer_head *bh,
-			int create, int extend_disksize)
+			int create, int extend_disksize, int flag)
 {
 	int retval;
 
@@ -1014,6 +1014,15 @@ int ext4_get_blocks_wrap(handle_t *handl
 	 * with create == 1 flag.
 	 */
 	down_write((&EXT4_I(inode)->i_data_sem));
+
+	/*
+	 * if the caller is from delayed allocation writeout path
+	 * we have already reserved fs blocks for allocation
+	 * let the underlying get_block() function know to
+	 * avoid double accounting
+	 */
+	if (flag)
+		EXT4_I(inode)->i_delalloc_reserved_flag = 1;
 	/*
 	 * We need to check for EXT4 here because migrate
 	 * could have changed the inode type in between
@@ -1035,6 +1044,8 @@ int ext4_get_blocks_wrap(handle_t *handl
 							~EXT4_EXT_MIGRATE;
 		}
 	}
+	if (flag)
+		EXT4_I(inode)->i_delalloc_reserved_flag = 0;
 	up_write((&EXT4_I(inode)->i_data_sem));
 	return retval;
 }
@@ -1060,7 +1071,7 @@ static int ext4_get_block(struct inode *
 	}
 
 	ret = ext4_get_blocks_wrap(handle, inode, iblock,
-					max_blocks, bh_result, create, 0);
+					max_blocks, bh_result, create, 0, 0);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -1086,7 +1097,7 @@ struct buffer_head *ext4_getblk(handle_t
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
 	err = ext4_get_blocks_wrap(handle, inode, block, 1,
-					&dummy, create, 1);
+					&dummy, create, 1, 0);
 	/*
 	 * ext4_get_blocks_handle() returns number of blocks
 	 * mapped. 0 in case of a HOLE.
@@ -1440,7 +1451,7 @@ static int ext4_da_reserve_space(struct 
 	return 0;       /* success */
 }
 
-static void ext4_da_release_space(struct inode *inode, int used, int to_free)
+void ext4_da_release_space(struct inode *inode, int used, int to_free)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	int total, mdb, release;
@@ -1483,7 +1494,7 @@ static int ext4_da_get_block_prep(struct
 	 * preallocated blocks are unmapped but should treated
 	 * the same as allocated blocks.
 	 */
-	ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0);
+	ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1,  bh_result, 0, 0, 0);
 	if ((ret == 0)&& !buffer_delay(bh_result)) {
 		/* the block isn't (pre)allocated yet, let's reserve space */
 		/*
@@ -1505,7 +1516,7 @@ static int ext4_da_get_block_prep(struct
 
 	return ret;
 }
-
+#define		EXT4_DELALLOC_RSVED	1
 static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
@@ -1519,7 +1530,7 @@ static int ext4_da_get_block_write(struc
 	BUG_ON(create == 0);
 
 	ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
-				   bh_result, create, 0);
+				   bh_result, create, 0, EXT4_DELALLOC_RSVED);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 
Index: linux-2.6.26-rc4/fs/ext4/ext4_i.h
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/ext4_i.h	2008-06-01 14:26:14.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/ext4_i.h	2008-06-01 15:04:14.000000000 -0700
@@ -166,6 +166,7 @@ struct ext4_inode_info {
 	/* allocation reservation info for delalloc */
 	unsigned long i_reserved_data_blocks;
 	unsigned long i_reserved_meta_blocks;
+	unsigned short i_delalloc_reserved_flag;
 };
 
 #endif	/* _EXT4_I */
Index: linux-2.6.26-rc4/fs/ext4/super.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/super.c	2008-06-01 14:26:14.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/super.c	2008-06-01 15:04:14.000000000 -0700
@@ -574,6 +574,7 @@ static struct inode *ext4_alloc_inode(st
 	spin_lock_init(&ei->i_prealloc_lock);
 	ei->i_reserved_data_blocks = 0;
 	ei->i_reserved_meta_blocks = 0;
+	ei->i_delalloc_reserved_flag = 0;
 	return &ei->vfs_inode;
 }
 
@@ -1328,6 +1329,7 @@ set_qf_format:
 			sbi->s_stripe = option;
 			break;
 		case Opt_delalloc:
+			printk("delayed allocation enabled\n");
 			set_opt(sbi->s_mount_opt, DELALLOC);
 			break;
 		default:
Index: linux-2.6.26-rc4/fs/ext4/mballoc.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/mballoc.c	2008-06-01 14:22:02.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/mballoc.c	2008-06-01 15:04:14.000000000 -0700
@@ -2831,7 +2831,15 @@ ext4_mb_mark_diskspace_used(struct ext4_
 	le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
 	spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
-	percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+
+	/*
+	 * free blocks account has already be reduced/reserved
+	 * at write_begin() time for delayed allocation
+	 * do not double accounting
+	 */
+	if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+		percpu_counter_sub(&sbi->s_freeblocks_counter,
+					ac->ac_b_ex.fe_len);
 
 	if (sbi->s_log_groups_per_flex) {
 		ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4055,6 +4063,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
 	}
 	inquota = ar->len;
 
+	if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+		ar->flags |= EXT4_MB_DELALLOC_RESERVED;
+
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 	if (!ac) {
 		ar->len = 0;
Index: linux-2.6.26-rc4/fs/ext4/dir.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/dir.c	2008-06-01 13:06:07.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/dir.c	2008-06-01 15:04:14.000000000 -0700
@@ -129,7 +129,8 @@ static int ext4_readdir(struct file * fi
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
+		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
+						0, 0, 0);
 		if (err > 0) {
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
Index: linux-2.6.26-rc4/fs/ext4/extents.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/extents.c	2008-06-01 14:57:57.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/extents.c	2008-06-01 15:04:14.000000000 -0700
@@ -2934,7 +2934,7 @@ retry:
 		}
 		ret = ext4_get_blocks_wrap(handle, inode, block,
 					  max_blocks, &map_bh,
-					  EXT4_CREATE_UNINITIALIZED_EXT, 0);
+					  EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
 		if (ret <= 0) {
 #ifdef EXT4FS_DEBUG
 			WARN_ON(ret <= 0);


--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux