Switch ext4 from the magic i_dio_count scheme to just hold i_rwsem until the actual I/O has completed to reduce the locking complexity and avoid nasty bugs due to missing inode_dio_wait calls. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- fs/ext4/extents.c | 12 ------------ fs/ext4/file.c | 21 +++++++++++++-------- fs/ext4/inode.c | 11 ----------- fs/ext4/ioctl.c | 5 ----- fs/ext4/move_extent.c | 4 ---- 5 files changed, 13 insertions(+), 40 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0e8708b77da6..b6aa2d249b30 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4777,9 +4777,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (mode & FALLOC_FL_KEEP_SIZE) flags |= EXT4_GET_BLOCKS_KEEP_SIZE; - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - /* Preallocate the range including the unaligned edges */ if (partial_begin || partial_end) { ret = ext4_alloc_file_blocks(file, @@ -4949,9 +4946,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); if (ret) goto out; @@ -5525,9 +5519,6 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - /* Wait for existing dio to complete */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache. @@ -5678,9 +5669,6 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) goto out_mutex; } - /* Wait for existing dio to complete */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache. diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 08b603d0c638..b3410a3ede27 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -74,9 +74,10 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); } - ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0); - inode_unlock_shared(inode); - + ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, + IOMAP_DIO_RWSEM_SHARED); + if (ret != -EIOCBQUEUED) + inode_unlock_shared(inode); file_accessed(iocb->ki_filp); return ret; } @@ -405,7 +406,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) !is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) { unaligned_aio = true; dio_flags |= IOMAP_DIO_SYNCHRONOUS; - inode_dio_wait(inode); } /* @@ -416,7 +416,10 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) && ext4_should_dioread_nolock(inode)) { overwrite = true; + dio_flags |= IOMAP_DIO_RWSEM_SHARED; downgrade_write(&inode->i_rwsem); + } else { + dio_flags |= IOMAP_DIO_RWSEM_EXCL; } if (offset + count > EXT4_I(inode)->i_disksize) { @@ -444,10 +447,12 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = ext4_handle_inode_extension(inode, offset, ret, count); out: - if (overwrite) - inode_unlock_shared(inode); - else - inode_unlock(inode); + if (ret != -EIOCBQUEUED) { + if (overwrite) + inode_unlock_shared(inode); + else + inode_unlock(inode); + } if (ret >= 0 && iov_iter_count(from)) { ssize_t err; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 629a25d999f0..e2dac0727ab0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3965,9 +3965,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } - /* Wait all existing dio workers, newcomers will block on i_mutex */ - inode_dio_wait(inode); - /* * Prevent page faults from reinstantiating pages we have released from * page cache. @@ -5263,11 +5260,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) goto err_out; } - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. - */ - inode_dio_wait(inode); } down_write(&EXT4_I(inode)->i_mmap_sem); @@ -5798,9 +5790,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (is_journal_aborted(journal)) return -EROFS; - /* Wait for all existing dio workers */ - inode_dio_wait(inode); - /* * Before flushing the journal and switching inode's aops, we have * to flush all dirty data the inode has. There can be outstanding diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e8870fff8224..99d21d81074f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -153,10 +153,6 @@ static long swap_inode_boot_loader(struct super_block *sb, if (err) goto err_out; - /* Wait for all existing dio workers */ - inode_dio_wait(inode); - inode_dio_wait(inode_bl); - truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode_bl->i_data, 0); @@ -364,7 +360,6 @@ static int ext4_ioctl_setflags(struct inode *inode, */ if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) && (flags & EXT4_IMMUTABLE_FL)) { - inode_dio_wait(inode); err = filemap_write_and_wait(inode->i_mapping); if (err) goto flags_out; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 30ce3dc69378..20240808569f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -602,10 +602,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, /* Protect orig and donor inodes against a truncate */ lock_two_nondirectories(orig_inode, donor_inode); - /* Wait for all existing dio workers */ - inode_dio_wait(orig_inode); - inode_dio_wait(donor_inode); - /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ -- 2.24.1