[PATCH 5/6] ext4: fix punch_hole extend handler

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Current implementation has following issues:
 - EOFBLOCK does not changed if necessery
 - ext4_ext_rm_leaf() may return -EAGAIN due to transaction restart
 - punched extent converted to uninitialized incorrectly, i dont understand
   why do we ever have to do that conversion, but once we do it let's do it
   in a right way.
 - fsync() logic is broken because ei->i_sync_tid was not updated
 - Last but not the least all: punch hole logic is sited directly
   in ext4_ext_map_blocks() on 3rd level of control, IMHO one can
   easily screw-up his eyes while invastigating that code. We have
   nothing to hide aren't we?

This patch performs following changes:
 - Move punch hole logic to didicated function similar to uninitialized
   extent handlers.
 - Clear EOFBLOCK if necessery, unfortunately we can not reuse
   check_eofblock_fl() function because it purpose to handle file
   expansion, but in our case we have to recheck base invariant that:
      clear_eof_flag = (eof_block >= last_allocated_block)
 - Repeat punch hole after transaction restart.
 - Update inode sync transaction id on exit.

Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx>
---
 fs/ext4/extents.c |  208 +++++++++++++++++++++++++++++++++--------------------
 1 files changed, 130 insertions(+), 78 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 963f883..877e61b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3219,6 +3219,126 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
                 unmap_underlying_metadata(bdev, block + i);
 }
 
+static int
+ext4_ext_handle_punched_extent(handle_t *handle, struct inode *inode,
+			struct ext4_map_blocks *map,
+			struct ext4_ext_path *path)
+{
+	struct ext4_extent *ex = path[path->p_depth].p_ext;
+	ext4_lblk_t ee_block =  ext4_ext_get_actual_len(ex);
+	unsigned short ee_len = le32_to_cpu(ex->ee_block);
+	struct ext4_map_blocks punch_map;
+	ext4_fsblk_t partial_cluster = 0;
+	unsigned int punched_out = 0;
+   	int err, inode_dirty = 0;
+
+	/* Punch out the map length, but only to the end of the extent */
+	punched_out = ee_len - (map->m_lblk - ee_block);
+	if (punched_out > map->m_len)
+		punched_out = map->m_len;
+	/*
+	 * Sense extents need to be converted to uninitialized, they must
+	 * fit in an uninitialized extent
+	 */
+	if (punched_out > EXT_UNINIT_MAX_LEN)
+		punched_out = EXT_UNINIT_MAX_LEN;
+
+	punch_map.m_lblk = map->m_lblk;
+	punch_map.m_pblk = map->m_lblk - ee_block + ext4_ext_pblock(ex);;
+	punch_map.m_len = punched_out;
+	punch_map.m_flags = 0;
+
+	/* Check to see if the extent needs to be split */
+	if (punch_map.m_len != ee_len || punch_map.m_lblk != ee_block) {
+		err = ext4_split_extent(handle, inode,	path, &punch_map, 0,
+					EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
+					EXT4_GET_BLOCKS_PRE_IO);
+		if (err < 0) {
+			goto out;
+		}
+		/* find extent for the block at the start of the hole */
+		ext4_ext_drop_refs(path);
+		kfree(path);
+
+		path = ext4_ext_find_extent(inode, map->m_lblk, NULL);
+		if (IS_ERR(path)) {
+			err = PTR_ERR(path);
+			path = NULL;
+			goto out;
+		}
+		ex = path[path->p_depth].p_ext;
+		ee_len = ext4_ext_get_actual_len(ex);
+		ee_block = le32_to_cpu(ex->ee_block);
+	}
+
+	err = ext4_ext_get_access(handle, inode, path + path->p_depth);
+	if (err)
+		return err;
+	ext4_ext_mark_uninitialized(ex);
+	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+	if (err)
+		goto out;
+	ext4_ext_invalidate_cache(inode);
+	err = ext4_ext_rm_leaf(handle, inode, path, &partial_cluster,
+			       map->m_lblk, map->m_lblk + punched_out);
+	if (err)
+		goto out;
+
+	inode_dirty = ext4_ext_try_shrink(handle, inode);
+	/* We have punched out an extent, if it was the only extent beyond
+	 * i_size  eofblocks flag should be cleared.*/
+	if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) {
+		ext4_fsblk_t eof_block =
+			(inode->i_size + (1 << inode->i_blkbits) - 1) >>
+				inode->i_blkbits;
+		/* find the latest extent */
+		ext4_ext_drop_refs(path);
+		kfree(path);
+		path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS -1, NULL);
+		if (IS_ERR(path)) {
+			err = PTR_ERR(path);
+			path = NULL;
+			goto out;
+		}
+		ex = path[path->p_depth].p_ext;
+		if (ex) {
+			ee_len = ext4_ext_get_actual_len(ex);
+			ee_block = le32_to_cpu(ex->ee_block);
+		} else {
+			/* Inode is empty */
+			ee_block = ee_len = 0;
+		}
+		if (eof_block >= ee_block + ee_len) {
+			ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+			inode_dirty = 1;
+		} else if (!ext4_ext_is_uninitialized(ex)) {
+			EXT4_ERROR_INODE(inode, "initialized extent beyond "
+					"EOF i_size: %lld, ex[%u:%u] "
+					"depth: %d pblock %lld",
+					inode->i_size, ee_block, ee_len,
+					path->p_depth,
+					path[path->p_depth].p_block);
+			err = -EIO;
+			/* Continue, because inode shrink should be
+			 * accomplished regardless to staled eof blocks */
+		}
+	}
+	if (inode_dirty) {
+		int err2 = ext4_mark_inode_dirty(handle, inode);
+		if (!err)
+			err = err2;
+	}
+out:
+	ext4_update_inode_fsync_trans(handle, inode, 0);
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+	return err ? err : punched_out;
+}
+
+
+
 /*
  * Handle EOFBLOCKS_FL flag, clearing it if necessary
  */
@@ -3715,24 +3835,24 @@ static int get_implied_cluster_alloc(struct super_block *sb,
 int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 			struct ext4_map_blocks *map, int flags)
 {
-	struct ext4_ext_path *path = NULL;
+	struct ext4_ext_path *path;
 	struct ext4_extent newex, *ex, *ex2;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	ext4_fsblk_t newblock = 0;
 	int free_on_err = 0, err = 0, depth, ret;
 	unsigned int allocated = 0, offset = 0;
 	unsigned int allocated_clusters = 0, reserved_clusters = 0;
-	unsigned int punched_out = 0;
-	unsigned int result = 0;
 	struct ext4_allocation_request ar;
 	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
 	ext4_lblk_t cluster_offset;
-	struct ext4_map_blocks punch_map;
 
 	ext_debug("blocks %u/%u requested for inode %lu\n",
 		  map->m_lblk, map->m_len, inode->i_ino);
 	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
+again:
+	path = NULL;
+
 	/* check in cache */
 	if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
 		ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
@@ -3803,8 +3923,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
 		/* if found extent covers block, simply return it */
 		if (in_range(map->m_lblk, ee_block, ee_len)) {
-			ext4_fsblk_t partial_cluster = 0;
-
 			newblock = map->m_lblk - ee_block + ee_start;
 			/* number of remaining blocks in the extent */
 			allocated = ee_len - (map->m_lblk - ee_block);
@@ -3826,74 +3944,11 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 					allocated, newblock);
 				return ret;
 			}
-
-			/*
-			 * Punch out the map length, but only to the
-			 * end of the extent
-			 */
-			punched_out = allocated < map->m_len ?
-				allocated : map->m_len;
-
-			/*
-			 * Sense extents need to be converted to
-			 * uninitialized, they must fit in an
-			 * uninitialized extent
-			 */
-			if (punched_out > EXT_UNINIT_MAX_LEN)
-				punched_out = EXT_UNINIT_MAX_LEN;
-
-			punch_map.m_lblk = map->m_lblk;
-			punch_map.m_pblk = newblock;
-			punch_map.m_len = punched_out;
-			punch_map.m_flags = 0;
-
-			/* Check to see if the extent needs to be split */
-			if (punch_map.m_len != ee_len ||
-				punch_map.m_lblk != ee_block) {
-
-				ret = ext4_split_extent(handle, inode,
-				path, &punch_map, 0,
-				EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
-				EXT4_GET_BLOCKS_PRE_IO);
-
-				if (ret < 0) {
-					err = ret;
-					goto out2;
-				}
-				/*
-				 * find extent for the block at
-				 * the start of the hole
-				 */
-				ext4_ext_drop_refs(path);
-				kfree(path);
-
-				path = ext4_ext_find_extent(inode,
-				map->m_lblk, NULL);
-				if (IS_ERR(path)) {
-					err = PTR_ERR(path);
-					path = NULL;
-					goto out2;
-				}
-
-				depth = ext_depth(inode);
-				ex = path[depth].p_ext;
-				ee_len = ext4_ext_get_actual_len(ex);
-				ee_block = le32_to_cpu(ex->ee_block);
-				ee_start = ext4_ext_pblock(ex);
-
-			}
-
-			ext4_ext_mark_uninitialized(ex);
-
-			ext4_ext_invalidate_cache(inode);
-
-			err = ext4_ext_rm_leaf(handle, inode, path,
-					       &partial_cluster, map->m_lblk,
-					       map->m_lblk + punched_out);
-
-			if (!err && ext4_ext_try_shrink(handle, inode))
-				err = ext4_mark_inode_dirty(handle, inode);
-
+			ret = ext4_ext_handle_punched_extent(handle, inode,
+							map, path);
+			if (ret == -EAGAIN)
+				goto again;
+			return ret;
 			goto out2;
 		}
 	}
@@ -4165,10 +4220,7 @@ out2:
 	trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
 		newblock, map->m_len, err ? err : allocated);
 
-	result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
-			punched_out : allocated;
-
-	return err ? err : result;
+	return err ? err : allocated;
 }
 
 void ext4_ext_truncate(struct inode *inode)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux