ext4: online defrag -- Add the EXT4_IOC_FREE_BLOCKS_INFO ioctl. From: Akira Fujita <a-fujita@xxxxxxxxxxxxx> The EXT4_IOC_FREE_BLOCKS_INFO ioctl gets free extents information of the target block group. This ioctl is used only in the force defrag (-f). Defragger calculates the victim extents (which will be moved into other block group in order to make contiguous free space for the target file) based on used/unused extents information. Signed-off-by: Akira Fujita <a-fujita@xxxxxxxxxxxxx> Signed-off-by: Takashi Sato <t-sato@xxxxxxxxxxxxx> --- fs/ext4/balloc.c | 2 +- fs/ext4/defrag.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/ext4.h | 27 +++++++++++++ fs/ext4/ioctl.c | 3 +- 4 files changed, 142 insertions(+), 2 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 02068fa..64ec04c 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -919,7 +919,7 @@ static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) * bitmap on disk and the last-committed copy in journal, until we find a * bit free in both bitmaps. */ -static ext4_grpblk_t +ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, ext4_grpblk_t maxblocks) { diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c index cb7d237..941414b 100644 --- a/fs/ext4/defrag.c +++ b/fs/ext4/defrag.c @@ -90,6 +90,102 @@ err: return -EIO; } +/** + * ext4_defrag_fblocks_distribution - Search free blocks distribution + * + * @org_inode: original inode + * @ext_info: ext4_extents_info + * + * This function returns 0 if succeed, otherwise returns error value. + */ +static int +ext4_defrag_fblocks_distribution(struct inode *org_inode, + struct ext4_extents_info *ext_info) +{ + struct buffer_head *bitmap_bh = NULL; + struct super_block *sb = org_inode->i_sb; + handle_t *handle; + ext4_group_t group_no; + ext4_grpblk_t start, end; + ext4_fsblk_t start_block = 0; + int i, err; + int num = 0; + int len = 0; + int block_set = 0; + int extra_block = 0; + + if (!sb) { + printk(KERN_ERR "ext4 defrag: Non-existent device\n"); + return -ENOSPC; + } + + group_no = (org_inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); + start = ext_info->g_offset; + end = EXT4_BLOCKS_PER_GROUP(sb) - 1; + + /* We consider about the boot block if bs = 1k */ + if (sb->s_blocksize == 1024) + extra_block = 1; + + handle = ext4_journal_start(org_inode, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + return err; + } + + bitmap_bh = ext4_read_block_bitmap(sb, group_no); + if (!bitmap_bh) { + err = -EIO; + goto out; + } + + BUFFER_TRACE(bitmap_bh, "get undo access for new block"); + err = ext4_journal_get_undo_access(handle, bitmap_bh); + if (err) + goto out; + + for (i = start; i <= end ; i++) { + if (bitmap_search_next_usable_block(i, bitmap_bh, i + 1) >= 0) { + len++; + /* + * Reset start_block if the free block is + * the head of region. + */ + if (!block_set) { + start_block = + i + group_no * EXT4_BLOCKS_PER_GROUP(sb) + + extra_block; + block_set = 1; + } + } else if (len) { + ext_info->ext[num].start = start_block; + ext_info->ext[num].len = len; + num++; + len = 0; + block_set = 0; + if (num == ext_info->max_entries) { + ext_info->g_offset = i + 1; + break; + } + } + if (i == end && len) { + ext_info->ext[num].start = start_block; + ext_info->ext[num].len = len; + num++; + } + } + + ext_info->entries = num; +out: + ext4_journal_release_buffer(handle, bitmap_bh); + brelse(bitmap_bh); + + if (handle) + ext4_journal_stop(handle); + + return err; +} + int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -125,6 +221,22 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, if (copy_to_user((struct ext4_group_data_info __user *)arg, &grp_data, sizeof(grp_data))) return -EFAULT; + } else if (cmd == EXT4_IOC_FREE_BLOCKS_INFO) { + struct ext4_extents_info ext_info; + + if (copy_from_user(&ext_info, + (struct ext4_extents_info __user *)arg, + sizeof(ext_info))) + return -EFAULT; + + BUG_ON(ext_info.ino != inode->i_ino); + + err = ext4_defrag_fblocks_distribution(inode, &ext_info); + + if (!err) + err = copy_to_user( + (struct ext4_extents_info __user *)arg, + &ext_info, sizeof(ext_info)); } else if (cmd == EXT4_IOC_DEFRAG) { struct ext4_ext_defrag_data defrag; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a635a76..ffe687b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -305,6 +305,7 @@ struct ext4_new_group_data { #define EXT4_IOC_DEFRAG _IOW('f', 15, struct ext4_ext_defrag_data) #define EXT4_IOC_FIBMAP _IOW('f', 16, ext4_fsblk_t) #define EXT4_IOC_GROUP_INFO _IOW('f', 17, struct ext4_group_data_info) +#define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 18, struct ext4_extents_info) /* * ioctl commands in 32 bit emulation @@ -322,6 +323,20 @@ struct ext4_new_group_data { #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION +/* + * The following four macros are used for the defrag force mode. + * + * DEFRAG_MAX_ENT: the maximum number of extents for exchanging between + * kernel-space and user-space per an ioctl + */ +#define DEFRAG_MAX_ENT 32 + +struct ext4_extent_data { + ext4_lblk_t block; /* start logical block number */ + ext4_fsblk_t start; /* start physical block number */ + int len; /* blocks count */ +}; + struct ext4_ext_defrag_data { ext4_lblk_t start_offset; /* start offset to defrag in blocks */ ext4_lblk_t defrag_size; /* size of defrag in blocks */ @@ -333,6 +348,16 @@ struct ext4_group_data_info { int s_inodes_per_group; /* inodes per group */ }; +struct ext4_extents_info { + unsigned long long ino; /* inode number */ + int max_entries; /* maximum extents count */ + int entries; /* extent number/count */ + ext4_lblk_t f_offset; /* file offset */ + ext4_grpblk_t g_offset; /* group offset */ + ext4_fsblk_t goal; /* block offset for allocation */ + struct ext4_extent_data ext[DEFRAG_MAX_ENT]; +}; + #define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */ /* @@ -1015,6 +1040,8 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); extern void ext4_init_block_alloc_info(struct inode *); extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); +extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t, + struct buffer_head *, ext4_grpblk_t); /* dir.c */ extern int ext4_check_dir_entry(const char *, struct inode *, diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c05502f..40bc2e1 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -258,7 +258,8 @@ setversion_out: } case EXT4_IOC_FIBMAP: case EXT4_IOC_DEFRAG: - case EXT4_IOC_GROUP_INFO: { + case EXT4_IOC_GROUP_INFO: + case EXT4_IOC_FREE_BLOCKS_INFO: { return ext4_defrag_ioctl(inode, filp, cmd, arg); } case EXT4_IOC_GROUP_ADD: { -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html