On Sun, 23 Jun 2013 15:07:36 +0900, Namjae Jeon <linkinjeon@xxxxxxxxx> wrote: > From: Namjae Jeon <namjae.jeon@xxxxxxxxxxx> What is the difference between this ioctl and generic punch_hole? > > The EXT4_IOC_TRUNCATE_BLOCK_RANGE removes the data blocks lying > between [start, "start + length") and updates the logical block numbers > of data blocks starting from "start + length" block to last block of file. > This will maintain contiguous nature of logical block numbers > after block removal. > Both the inode's disksize and logical size are updated after block > removal > > Signed-off-by: Namjae Jeon <namjae.jeon@xxxxxxxxxxx> > Signed-off-by: Ashish Sangwan <a.sangwan@xxxxxxxxxxx> > --- > fs/ext4/ext4.h | 8 ++ > fs/ext4/ext4_extents.h | 3 + > fs/ext4/extents.c | 245 ++++++++++++++++++++++++++++++++++++++++++++++++ > fs/ext4/ioctl.c | 62 ++++++++++++ > 4 files changed, 318 insertions(+) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 6ed348d..df2c411 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -590,6 +590,7 @@ enum { > #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) > #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) > #define EXT4_IOC_SWAP_BOOT _IO('f', 17) > +#define EXT4_IOC_TRUNCATE_BLOCK_RANGE _IOW('f', 18, struct truncate_range) > > #if defined(__KERNEL__) && defined(CONFIG_COMPAT) > /* > @@ -682,6 +683,11 @@ struct move_extent { > __u64 moved_len; /* moved block length */ > }; > > +struct truncate_range { > + __u32 start_block; > + __u32 length; > +}; > + > #define EXT4_EPOCH_BITS 2 > #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) > #define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) > @@ -2692,6 +2698,8 @@ extern int ext4_find_delalloc_range(struct inode *inode, > extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); > extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, > __u64 start, __u64 len); > +extern int ext4_ext_truncate_range(struct inode *inode, ext4_lblk_t start, > + ext4_lblk_t end, ext4_lblk_t last_block); > > > /* move_extent.c */ > diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h > index 51bc821..cc113cc 100644 > --- a/fs/ext4/ext4_extents.h > +++ b/fs/ext4/ext4_extents.h > @@ -178,6 +178,9 @@ struct ext4_ext_path { > #define EXT_MAX_INDEX(__hdr__) \ > (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1) > > +#define EXTENT_START_FLAG 0x1 > +#define INDEX_START_FLAG 0x2 > + > static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) > { > return (struct ext4_extent_header *) EXT4_I(inode)->i_data; > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index 937593e..ed85e34 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -4757,3 +4757,248 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, > > return error; > } > + > +/* > + * ext4_trange_dirty_path: Function to mark the path buffer dirty. > + * It also checks if there are sufficient credits left in the > + * journal to update metadata. If the number of credits are less > + * restart the handle with additional credits. > + * > + * @handle: journal handle > + * @inode: file inode > + * @path: pointer to path > + * @num: number of inodes to be updated > + * > + * Returns: 0 on success or negative value on error > + */ > +int ext4_trange_dirty_path(handle_t *handle, struct inode *inode, > + struct ext4_ext_path *path, > + int num, ...) > +{ > + int credits, err, i; > + struct inode *iptr; > + va_list args; > + > + /* > + * Check if need to extend journal credits > + * 3 for leaf, sb, and inode plus 2 (bmap and group > + * descriptor) for each block group; assume two block > + * groups > + */ > + if (handle->h_buffer_credits < 7*(num + 1)) { > + credits = ext4_writepage_trans_blocks(inode); > + va_start(args, num); > + for (i = 1; i <= num; i++) { > + iptr = va_arg(args, struct inode *); > + credits += ext4_writepage_trans_blocks(iptr); > + } > + va_end(args); > + err = ext4_ext_truncate_extend_restart(handle, inode, credits); > + /* EAGAIN is success */ > + if (err && err != -EAGAIN) > + return err; > + } > + err = ext4_ext_get_access(handle, inode, path); > + return err; > +} > + > +/* > + * ext4_ext_update_path: update the extents of a path structure > + * lying between path[depth].p_ext and EXT_LAST_EXTENT(path[depth].p_hdr) > + * subtracting shift from starting block for each extent. > + * > + * @path: path for which extents are updated > + * @shift: Number of blocks to be subtracted from first logical block > + * that extent covers for each extent. > + * @inode: file inode > + * @handle: journal handle > + * @start_block: Points to the starting block of next extent which is > + * to be updated. > + * > + * Returns: 0 on success or negative on error. > + */ > +int ext4_ext_update_path(struct ext4_ext_path *path, ext4_lblk_t shift, > + struct inode *inode, handle_t *handle, > + ext4_lblk_t *start_block) > +{ > + int depth, err = 0, flag = 0; > + struct ext4_extent *ex_start, *ex_last; > + > + depth = path->p_depth; > + while (depth >= 0) { > + if (depth == path->p_depth) { > + ex_start = path[depth].p_ext; > + if (!ex_start) > + return -EIO; > + > + err = ext4_trange_dirty_path(handle, inode, > + path + depth, 0); > + if (err) > + goto out; > + > + if (path[depth].p_ext == > + EXT_FIRST_EXTENT(path[depth].p_hdr)) > + flag |= EXTENT_START_FLAG; > + > + ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); > + while (ex_start <= ex_last) { > + *start_block = ex_start->ee_block + > + ext4_ext_get_actual_len(ex_start); > + ex_start->ee_block -= shift; > + ex_start++; > + } > + err = ext4_ext_dirty(handle, inode, path + depth); > + if (err) > + goto out; > + } else { > + /* If encountered starting extent, update index too */ > + if (path->p_depth - depth == 1) { > + if (flag & EXTENT_START_FLAG) { > + /* Update index too */ > + err = ext4_trange_dirty_path(handle, > + inode, path + depth, 0); > + if (err) > + goto out; > + path[depth].p_idx->ei_block -= shift; > + err = ext4_ext_dirty(handle, inode, > + path + depth); > + if (err) > + goto out; > + flag &= ~EXTENT_START_FLAG; > + } else > + /* No need to update any extent index */ > + break; > + } > + /* Check, if earlier encountered starting index */ > + if (flag & INDEX_START_FLAG) { > + err = ext4_trange_dirty_path(handle, inode, > + path + (depth), 0); > + if (err) > + goto out; > + path[depth].p_idx->ei_block -= shift; > + err = ext4_ext_dirty(handle, inode, > + path + depth); > + if (err) > + goto out; > + flag &= ~INDEX_START_FLAG; > + } > + /* Check if this is a starting index */ > + if (path[depth].p_idx == > + EXT_FIRST_INDEX(path[depth].p_hdr)) { > + /* starting of a block */ > + flag |= INDEX_START_FLAG; > + } else > + break; > + } > + depth--; > + } > +out: > + return err; > +} > + > +/* > + * ext4_ext_update_logical: update logical blocks ranging from start > + * to the end block for inode by moving them shift blocks to the left > + * > + * @inode: file inode > + * @handle: journal handle > + * @start_block : starting block for block updation > + * @shift: number of blocks to be shifted > + * @end_block: last block to be updated > + * > + * Returns: 0 on success or negative on failure > + */ > +static int ext4_ext_update_logical(struct inode *inode, handle_t *handle, > + ext4_lblk_t start_block, ext4_lblk_t shift, > + ext4_lblk_t end_block) > +{ > + struct ext4_ext_path *path; > + int err = 0; > + > + while (start_block < end_block) { > + path = ext4_ext_find_extent(inode, start_block, NULL); > + if (IS_ERR(path)) { > + err = PTR_ERR(path); > + break; > + } > + err = ext4_ext_update_path(path, shift, inode, > + handle, &start_block); > + ext4_ext_drop_refs(path); > + kfree(path); > + if (err) > + break; > + } > + return err; > +} > + > +/* > + * ext4_ext_truncate_range: truncate the block range from start > + * block to end block including the end block from inode. > + * > + * @inode: file inode > + * @start: start block > + * @end: end block > + * last_block: last_block number of the inode > + * > + * Returns: 0 on success or negative on error > + */ > +int ext4_ext_truncate_range(struct inode *inode, ext4_lblk_t start, > + ext4_lblk_t end, ext4_lblk_t last_block) > +{ > + int ret, credits; > + ext4_lblk_t shift = end - start + 1; > + handle_t *handle; > + loff_t isize_reduced; > + int blkbits = inode->i_blkbits; > + struct address_space *mapping = inode->i_mapping; > + > + /* sync dirty pages for transfer */ > + if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { > + ret = filemap_write_and_wait_range(mapping, > + (loff_t)start << blkbits, > + ((loff_t)(last_block + 1) << blkbits) - 1); > + if (ret) > + return ret; > + } > + truncate_inode_pages_range(inode->i_mapping, > + start << inode->i_blkbits, -1); > + ext4_inode_block_unlocked_dio(inode); > + inode_dio_wait(inode); > + down_write(&EXT4_I(inode)->i_data_sem); > + ext4_discard_preallocations(inode); > + ret = ext4_es_remove_extent(inode, start, end - start + 1); > + if (ret) > + goto out; > + > + credits = ext4_writepage_trans_blocks(inode); > + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); > + if (IS_ERR(handle)) { > + ret = PTR_ERR(handle); > + goto out; > + } > + > + ret = ext4_ext_remove_space(inode, start, end); > + if (ret) > + goto journal_stop; > + > + ext4_discard_preallocations(inode); > + > + if (end < last_block) { > + ret = ext4_ext_update_logical(inode, handle, end + 1, > + shift, last_block + 1); > + if (ret) > + goto journal_stop; > + } > + isize_reduced = (loff_t)shift << blkbits; > + i_size_write(inode, inode->i_size - isize_reduced); > + EXT4_I(inode)->i_disksize -= isize_reduced; > + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); > + ext4_mark_inode_dirty(handle, inode); > +journal_stop: > + ext4_journal_stop(handle); > +out: > + ext4_inode_resume_unlocked_dio(inode); > + up_write(&EXT4_I(inode)->i_data_sem); > + return ret; > +} > + > diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c > index 9491ac0..0530daf 100644 > --- a/fs/ext4/ioctl.c > +++ b/fs/ext4/ioctl.c > @@ -622,6 +622,68 @@ resizefs_out: > > return 0; > } > + case EXT4_IOC_TRUNCATE_BLOCK_RANGE: > + { > + struct truncate_range tr; > + ext4_lblk_t last_block, end_block; > + int error; > + loff_t i_size = i_size_read(inode); > + > + if (!i_size) > + return 0; > + > + if (!(filp->f_mode & FMODE_WRITE)) > + return -EBADF; > + > + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) > + return -EPERM; > + > + if (!S_ISREG(inode->i_mode)) > + return -EOPNOTSUPP; > + > + if (IS_SWAPFILE(inode)) > + return -EOPNOTSUPP; > + > + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) > + return -EOPNOTSUPP; > + > + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, > + EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { > + ext4_msg(sb, KERN_ERR, > + "Truncate block range not supported with bigalloc"); > + return -EOPNOTSUPP; > + } > + > + if (copy_from_user(&tr, (const void *) arg, > + sizeof(struct truncate_range))) > + return -EFAULT; > + > + if (!tr.length) > + return -EINVAL; > + > + end_block = tr.start_block + tr.length - 1; > + > + last_block = ((round_up(i_size, > + EXT4_BLOCK_SIZE(inode->i_sb))) > + >> inode->i_blkbits) - 1; > + if (tr.start_block > end_block || > + tr.start_block > last_block) > + return -EINVAL; > + > + if (end_block > last_block) > + end_block = last_block; > + > + error = mnt_want_write_file(filp); > + if (error) > + return error; > + > + mutex_lock(&inode->i_mutex); > + error = ext4_ext_truncate_range(inode, tr.start_block, > + end_block, last_block); > + mutex_unlock(&inode->i_mutex); > + mnt_drop_write_file(filp); > + return error; > + } > > default: > return -ENOTTY; > -- > 1.7.9.5 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html