Hi, I'm just tossing this proof-of-concept patch out there to get some feedback from the community. The problem relates to the performance of fiemap on sparse files. If you have a very big sparse file with huge holes, when those holes are encountered, function __generic_block_fiemap iterates for every block with "start_blk++;". This is extremely slow, inefficient and time consuming. A simple command like: dd if=/dev/zero of=/mnt/gfs2/filler-P bs=1 count=1 seek=1P will cause some file systems to run continuously for days or weeks given a filefrag command, even though the file contains only a single byte. I encountered it with GFS2. Sure, GFS2 does not need to call the generic fiemap. I can (and did) easily implement a GFS2-specific block_fiemap that detects and skips holes. My question is: Does it make sense to extend this to other file systems? This patch just adds a hook in function generic_block_fiemap to call a fs-specific function to return a hole size. That way, the function doesn't have to do a block-by-block search when a hole is encountered. This, of course, would be followed up with a GFS2 patch to take advantage of the new hook. I realize not all file systems can make use of this concept, so I don't know if this is valuable or not. I thought I'd toss it out there to see what people think. Regards, Bob Peterson Red Hat File Systems Signed-off-by: Bob Peterson <rpeterso@xxxxxxxxxx> --- diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 36d35c3..359615ee 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -778,7 +778,7 @@ int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { return generic_block_fiemap(inode, fieinfo, start, len, - ext2_get_block); + ext2_get_block, NULL); } static int ext2_writepage(struct page *page, struct writeback_control *wbc) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2c6ccc4..03e0a51 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1053,7 +1053,7 @@ int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { return generic_block_fiemap(inode, fieinfo, start, len, - ext3_get_block); + ext3_get_block, NULL); } /* diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 4da228a..9943b81 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5153,7 +5153,7 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* fallback to generic here if not in extents fmt */ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return generic_block_fiemap(inode, fieinfo, start, len, - ext4_get_block); + ext4_get_block, NULL); if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) return -EBADR; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f8cf619..3788474 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -724,7 +724,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { return generic_block_fiemap(inode, fieinfo, - start, len, get_data_block_fiemap); + start, len, get_data_block_fiemap, NULL); } static int f2fs_read_data_page(struct file *file, struct page *page) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e62e594..e93a3bd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1936,7 +1936,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, ret = 0; } else { ret = __generic_block_fiemap(inode, fieinfo, start, len, - gfs2_block_map); + gfs2_block_map, NULL); } gfs2_glock_dq_uninit(&gh); diff --git a/fs/ioctl.c b/fs/ioctl.c index 8ac3fad..5821a80 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -234,6 +234,7 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) * @start: where to start mapping in the inode * @len: how much space to map * @get_block: the fs's get_block function + * @get_hole_size: the fs's get_hole_size function * * This does FIEMAP for block based inodes. Basically it will just loop * through get_block until we hit the number of extents we want to map, or we @@ -249,7 +250,8 @@ static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) int __generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, - loff_t len, get_block_t *get_block) + loff_t len, get_block_t *get_block, + get_hole_size_t *get_hole_size) { struct buffer_head map_bh; sector_t start_blk, last_blk; @@ -258,6 +260,7 @@ int __generic_block_fiemap(struct inode *inode, u32 flags = FIEMAP_EXTENT_MERGED; bool past_eof = false, whole_file = false; int ret = 0; + u64 holesize = 1; ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC); if (ret) @@ -297,7 +300,12 @@ int __generic_block_fiemap(struct inode *inode, /* HOLE */ if (!buffer_mapped(&map_bh)) { - start_blk++; + if (get_hole_size) { + holesize = get_hole_size(inode, start_blk); + BUG_ON(!holesize); + } + + start_blk += holesize; /* * We want to handle the case where there is an @@ -403,11 +411,13 @@ EXPORT_SYMBOL(__generic_block_fiemap); int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, - u64 len, get_block_t *get_block) + u64 len, get_block_t *get_block, + get_hole_size_t *get_hole_size) { int ret; mutex_lock(&inode->i_mutex); - ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block); + ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block, + get_hole_size); mutex_unlock(&inode->i_mutex); return ret; } diff --git a/include/linux/fs.h b/include/linux/fs.h index e11d60c..48c9b67 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -65,6 +65,7 @@ extern int sysctl_protected_hardlinks; struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); +typedef u64 (get_hole_size_t)(struct inode *inode, sector_t lblock); typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); @@ -2545,10 +2546,12 @@ extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, extern int __generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, - get_block_t *get_block); + get_block_t *get_block, + get_hole_size_t *get_hole_size); extern int generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, - u64 len, get_block_t *get_block); + u64 len, get_block_t *get_block, + get_hole_size_t *get_hole_size); extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html