[PATCH] fs: Add hooks for get_hole_size to generic_block_fiemap

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I'm just tossing this proof-of-concept patch out there to get some feedback
from the community. The problem relates to the performance of fiemap on
sparse files.

If you have a very big sparse file with huge holes, when those holes are
encountered, function __generic_block_fiemap iterates for every block
with "start_blk++;". This is extremely slow, inefficient and time consuming.
A simple command like:

   dd if=/dev/zero of=/mnt/gfs2/filler-P bs=1 count=1 seek=1P

will cause some file systems to run continuously for days or weeks given
a filefrag command, even though the file contains only a single byte.
I encountered it with GFS2.

Sure, GFS2 does not need to call the generic fiemap. I can (and did)
easily implement a GFS2-specific block_fiemap that detects and skips holes.
My question is: Does it make sense to extend this to other file systems?

This patch just adds a hook in function generic_block_fiemap to call a
fs-specific function to return a hole size. That way, the function
doesn't have to do a block-by-block search when a hole is encountered.

This, of course, would be followed up with a GFS2 patch to take advantage
of the new hook.

I realize not all file systems can make use of this concept, so I don't
know if this is valuable or not. I thought I'd toss it out there to see
what people think.

Regards,

Bob Peterson
Red Hat File Systems

Signed-off-by: Bob Peterson <rpeterso@xxxxxxxxxx> 
---
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 36d35c3..359615ee 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -778,7 +778,7 @@ int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len)
 {
 	return generic_block_fiemap(inode, fieinfo, start, len,
-				    ext2_get_block);
+				    ext2_get_block, NULL);
 }
 
 static int ext2_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc4..03e0a51 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1053,7 +1053,7 @@ int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len)
 {
 	return generic_block_fiemap(inode, fieinfo, start, len,
-				    ext3_get_block);
+				    ext3_get_block, NULL);
 }
 
 /*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4da228a..9943b81 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5153,7 +5153,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	/* fallback to generic here if not in extents fmt */
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 		return generic_block_fiemap(inode, fieinfo, start, len,
-			ext4_get_block);
+			ext4_get_block, NULL);
 
 	if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
 		return -EBADR;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f8cf619..3788474 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -724,7 +724,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len)
 {
 	return generic_block_fiemap(inode, fieinfo,
-				start, len, get_data_block_fiemap);
+				start, len, get_data_block_fiemap, NULL);
 }
 
 static int f2fs_read_data_page(struct file *file, struct page *page)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e62e594..e93a3bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1936,7 +1936,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			ret = 0;
 	} else {
 		ret = __generic_block_fiemap(inode, fieinfo, start, len,
-					     gfs2_block_map);
+					     gfs2_block_map, NULL);
 	}
 
 	gfs2_glock_dq_uninit(&gh);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad..5821a80 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -234,6 +234,7 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
  * @start: where to start mapping in the inode
  * @len: how much space to map
  * @get_block: the fs's get_block function
+ * @get_hole_size: the fs's get_hole_size function
  *
  * This does FIEMAP for block based inodes.  Basically it will just loop
  * through get_block until we hit the number of extents we want to map, or we
@@ -249,7 +250,8 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
 
 int __generic_block_fiemap(struct inode *inode,
 			   struct fiemap_extent_info *fieinfo, loff_t start,
-			   loff_t len, get_block_t *get_block)
+			   loff_t len, get_block_t *get_block,
+			   get_hole_size_t *get_hole_size)
 {
 	struct buffer_head map_bh;
 	sector_t start_blk, last_blk;
@@ -258,6 +260,7 @@ int __generic_block_fiemap(struct inode *inode,
 	u32 flags = FIEMAP_EXTENT_MERGED;
 	bool past_eof = false, whole_file = false;
 	int ret = 0;
+	u64 holesize = 1;
 
 	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
 	if (ret)
@@ -297,7 +300,12 @@ int __generic_block_fiemap(struct inode *inode,
 
 		/* HOLE */
 		if (!buffer_mapped(&map_bh)) {
-			start_blk++;
+			if (get_hole_size) {
+				holesize = get_hole_size(inode, start_blk);
+				BUG_ON(!holesize);
+			}
+			
+			start_blk += holesize;
 
 			/*
 			 * We want to handle the case where there is an
@@ -403,11 +411,13 @@ EXPORT_SYMBOL(__generic_block_fiemap);
 
 int generic_block_fiemap(struct inode *inode,
 			 struct fiemap_extent_info *fieinfo, u64 start,
-			 u64 len, get_block_t *get_block)
+			 u64 len, get_block_t *get_block,
+			 get_hole_size_t *get_hole_size)
 {
 	int ret;
 	mutex_lock(&inode->i_mutex);
-	ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
+	ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block,
+				     get_hole_size);
 	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60c..48c9b67 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -65,6 +65,7 @@ extern int sysctl_protected_hardlinks;
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create);
+typedef u64 (get_hole_size_t)(struct inode *inode, sector_t lblock);
 typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 			ssize_t bytes, void *private);
 
@@ -2545,10 +2546,12 @@ extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 extern int __generic_block_fiemap(struct inode *inode,
 				  struct fiemap_extent_info *fieinfo,
 				  loff_t start, loff_t len,
-				  get_block_t *get_block);
+				  get_block_t *get_block,
+				  get_hole_size_t *get_hole_size);
 extern int generic_block_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo, u64 start,
-				u64 len, get_block_t *get_block);
+				u64 len, get_block_t *get_block,
+				get_hole_size_t *get_hole_size);
 
 extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux