On 2024/11/19 7:27, Darrick J. Wong wrote: > On Tue, Oct 22, 2024 at 07:10:35PM +0800, Zhang Yi wrote: >> From: Zhang Yi <yi.zhang@xxxxxxxxxx> >> >> The current implementation of ext4_punch_hole() contains complex >> position calculations and stale error tags. To improve the code's >> clarity and maintainability, it is essential to clean up the code and >> improve its readability, this can be achieved by: a) simplifying and >> renaming variables; b) eliminating unnecessary position calculations; >> c) writing back all data in data=journal mode, and drop page cache from >> the original offset to the end, rather than using aligned blocks, >> d) renaming the stale error tags. >> >> Signed-off-by: Zhang Yi <yi.zhang@xxxxxxxxxx> >> --- >> fs/ext4/inode.c | 140 +++++++++++++++++++++--------------------------- >> 1 file changed, 62 insertions(+), 78 deletions(-) >> >> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c >> index 94b923afcd9c..1d128333bd06 100644 >> --- a/fs/ext4/inode.c >> +++ b/fs/ext4/inode.c >> @@ -3955,13 +3955,14 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) >> { >> struct inode *inode = file_inode(file); >> struct super_block *sb = inode->i_sb; >> - ext4_lblk_t first_block, stop_block; >> + ext4_lblk_t start_lblk, end_lblk; >> struct address_space *mapping = inode->i_mapping; >> - loff_t first_block_offset, last_block_offset, max_length; >> - struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); >> + loff_t max_end = EXT4_SB(sb)->s_bitmap_maxbytes - sb->s_blocksize; >> + loff_t end = offset + length; >> + unsigned long blocksize = i_blocksize(inode); >> handle_t *handle; >> unsigned int credits; >> - int ret = 0, ret2 = 0; >> + int ret = 0; >> >> trace_ext4_punch_hole(inode, offset, length, 0); >> >> @@ -3969,36 +3970,27 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) >> >> /* No need to punch hole beyond i_size */ >> if (offset >= inode->i_size) >> - goto out_mutex; >> + goto out; >> >> /* >> - * If the hole extends beyond i_size, set the hole >> - * to end after the page that contains i_size >> + * If the hole extends beyond i_size, set the hole to end after >> + * the page that contains i_size, and also make sure that the hole >> + * within one block before last range. >> */ >> - if (offset + length > inode->i_size) { >> - length = inode->i_size + >> - PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) - >> - offset; >> - } >> + if (end > inode->i_size) >> + end = round_up(inode->i_size, PAGE_SIZE); >> + if (end > max_end) >> + end = max_end; >> + length = end - offset; >> >> /* >> - * For punch hole the length + offset needs to be within one block >> - * before last range. Adjust the length if it goes beyond that limit. >> + * Attach jinode to inode for jbd2 if we do any zeroing of partial >> + * block. >> */ >> - max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize; >> - if (offset + length > max_length) >> - length = max_length - offset; >> - >> - if (offset & (sb->s_blocksize - 1) || >> - (offset + length) & (sb->s_blocksize - 1)) { >> - /* >> - * Attach jinode to inode for jbd2 if we do any zeroing of >> - * partial block >> - */ >> + if (offset & (blocksize - 1) || end & (blocksize - 1)) { > > IS_ALIGNED(offset | end, blocksize) ? Right, this helper looks better, thanks for pointing this out. > >> ret = ext4_inode_attach_jinode(inode); >> if (ret < 0) >> - goto out_mutex; >> - >> + goto out; >> } >> >> /* Wait all existing dio workers, newcomers will block on i_rwsem */ >> @@ -4006,7 +3998,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) >> >> ret = file_modified(file); >> if (ret) >> - goto out_mutex; >> + goto out; >> >> /* >> * Prevent page faults from reinstantiating pages we have released from >> @@ -4016,34 +4008,24 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) >> >> ret = ext4_break_layouts(inode); >> if (ret) >> - goto out_dio; >> - >> - first_block_offset = round_up(offset, sb->s_blocksize); >> - last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; >> + goto out_invalidate_lock; >> >> - /* Now release the pages and zero block aligned part of pages*/ >> - if (last_block_offset > first_block_offset) { >> + /* >> + * For journalled data we need to write (and checkpoint) pages >> + * before discarding page cache to avoid inconsitent data on > > inconsistent Yeah. > >> + * disk in case of crash before punching trans is committed. >> + */ >> + if (ext4_should_journal_data(inode)) { >> + ret = filemap_write_and_wait_range(mapping, offset, end - 1); >> + } else { >> ret = ext4_update_disksize_before_punch(inode, offset, length); >> - if (ret) >> - goto out_dio; >> - >> - /* >> - * For journalled data we need to write (and checkpoint) pages >> - * before discarding page cache to avoid inconsitent data on >> - * disk in case of crash before punching trans is committed. >> - */ >> - if (ext4_should_journal_data(inode)) { >> - ret = filemap_write_and_wait_range(mapping, >> - first_block_offset, last_block_offset); >> - if (ret) >> - goto out_dio; >> - } >> - >> - ext4_truncate_folios_range(inode, first_block_offset, >> - last_block_offset + 1); >> - truncate_pagecache_range(inode, first_block_offset, >> - last_block_offset); >> + ext4_truncate_folios_range(inode, offset, end); >> } >> + if (ret) >> + goto out_invalidate_lock; >> + >> + /* Now release the pages and zero block aligned part of pages*/ >> + truncate_pagecache_range(inode, offset, end - 1); >> >> if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) >> credits = ext4_writepage_trans_blocks(inode); >> @@ -4053,52 +4035,54 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) >> if (IS_ERR(handle)) { >> ret = PTR_ERR(handle); >> ext4_std_error(sb, ret); >> - goto out_dio; >> + goto out_invalidate_lock; >> } >> >> - ret = ext4_zero_partial_blocks(handle, inode, offset, >> - length); >> + ret = ext4_zero_partial_blocks(handle, inode, offset, length); >> if (ret) >> - goto out_stop; >> - >> - first_block = (offset + sb->s_blocksize - 1) >> >> - EXT4_BLOCK_SIZE_BITS(sb); >> - stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); >> + goto out_handle; >> >> /* If there are blocks to remove, do it */ >> - if (stop_block > first_block) { >> - ext4_lblk_t hole_len = stop_block - first_block; >> + start_lblk = round_up(offset, blocksize) >> inode->i_blkbits; > > egad I wish ext4 had nicer unit conversion helpers. > > static inline ext4_lblk_t > EXT4_B_TO_LBLK(struct ext4_sb_info *sbi, ..., loff_t offset) > { > return round_up(offset, blocksize) >> inode->i_blkbits; > } > > start_lblk = EXT4_B_TO_LBLK(sbi, offset); > > ah well. > Sure, it looks clearer. >> + end_lblk = end >> inode->i_blkbits; >> + >> + if (end_lblk > start_lblk) { >> + ext4_lblk_t hole_len = end_lblk - start_lblk; >> >> down_write(&EXT4_I(inode)->i_data_sem); >> ext4_discard_preallocations(inode); >> >> - ext4_es_remove_extent(inode, first_block, hole_len); >> + ext4_es_remove_extent(inode, start_lblk, hole_len); >> >> if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) >> - ret = ext4_ext_remove_space(inode, first_block, >> - stop_block - 1); >> + ret = ext4_ext_remove_space(inode, start_lblk, >> + end_lblk - 1); >> else >> - ret = ext4_ind_remove_space(handle, inode, first_block, >> - stop_block); >> + ret = ext4_ind_remove_space(handle, inode, start_lblk, >> + end_lblk); >> + if (ret) { >> + up_write(&EXT4_I(inode)->i_data_sem); >> + goto out_handle; >> + } >> >> - ext4_es_insert_extent(inode, first_block, hole_len, ~0, >> + ext4_es_insert_extent(inode, start_lblk, hole_len, ~0, >> EXTENT_STATUS_HOLE, 0); >> up_write(&EXT4_I(inode)->i_data_sem); >> } >> - ext4_fc_track_range(handle, inode, first_block, stop_block); >> + ext4_fc_track_range(handle, inode, start_lblk, end_lblk); >> + >> + ret = ext4_mark_inode_dirty(handle, inode); >> + if (unlikely(ret)) >> + goto out_handle; >> + >> + ext4_update_inode_fsync_trans(handle, inode, 1); >> if (IS_SYNC(inode)) >> ext4_handle_sync(handle); >> - >> - ret2 = ext4_mark_inode_dirty(handle, inode); >> - if (unlikely(ret2)) >> - ret = ret2; >> - if (ret >= 0) >> - ext4_update_inode_fsync_trans(handle, inode, 1); >> -out_stop: >> +out_handle: >> ext4_journal_stop(handle); >> -out_dio: >> +out_invalidate_lock: >> filemap_invalidate_unlock(mapping); >> -out_mutex: >> +out: > > Why drop "_mutex"? You're unlocking *something* on the way out. > "_mutex" is no longer accurate, as the inode has changed to using rwsem instead. But never mind, this "out" tag is also removed in patch 9. Thanks, Yi. > >> inode_unlock(inode); >> return ret; >> } >> -- >> 2.46.1 >> >>