From: Eric Biggers <ebiggers@xxxxxxxxxx> Convert f2fs_file_write_iter() to use iomap_dio_rw() for direct I/O rather than using f2fs_direct_IO() via __generic_file_write_iter(). This is more complicated than the read-side conversion, but it follows a similar pattern. Some logic in __generic_file_write_iter() needed to be re-implemented, while other things are now handled by iomap_dio_rw(). Existing f2fs behavior such as the conditions for falling back to buffered I/O is retained, except for some things which shouldn't matter such as the exact time that the timestamps are updated. Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx> --- fs/f2fs/data.c | 7 +- fs/f2fs/f2fs.h | 7 +- fs/f2fs/file.c | 215 ++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 180 insertions(+), 49 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9243159ee753..0d2bb651483d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1363,11 +1363,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) f2fs_invalidate_compress_page(sbi, old_blkaddr); } f2fs_update_data_blkaddr(dn, dn->data_blkaddr); - - /* - * i_size will be updated by direct_IO. Otherwise, we'll get stale - * data from unwritten block via dio_read. - */ return 0; } @@ -3130,7 +3125,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, FS_CP_DATA_IO : FS_DATA_IO); } -static void f2fs_write_failed(struct inode *inode, loff_t to) +void f2fs_write_failed(struct inode *inode, loff_t to) { loff_t i_size = i_size_read(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f869c4a2f79f..6dbbac05a15c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3238,10 +3238,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, spin_lock(&sbi->iostat_lock); sbi->rw_iostat[type] += io_bytes; - if (type == APP_WRITE_IO || type == APP_DIRECT_IO) - sbi->rw_iostat[APP_BUFFERED_IO] = - sbi->rw_iostat[APP_WRITE_IO] - - sbi->rw_iostat[APP_DIRECT_IO]; + if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO) + sbi->rw_iostat[APP_WRITE_IO] += io_bytes; if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO) sbi->rw_iostat[APP_READ_IO] += io_bytes; @@ -3625,6 +3623,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, struct writeback_control *wbc, enum iostat_type io_type, int compr_blocks, bool allow_balance); +void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 52de655ef833..6b8eac6b25d4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4292,6 +4292,29 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } +static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + ssize_t count; + int err; + + if (IS_IMMUTABLE(inode)) + return -EPERM; + + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) + return -EPERM; + + count = generic_write_checks(iocb, from); + if (count <= 0) + return count; + + err = file_modified(file); + if (err) + return err; + return count; +} + /* * Preallocate blocks for a write request, if it is possible and helpful to do * so. Returns a positive number if blocks may have been preallocated, 0 if no @@ -4299,15 +4322,14 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the * requested blocks (not just some of them) have been allocated. */ -static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter) +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, + bool dio) { struct inode *inode = file_inode(iocb->ki_filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); const loff_t pos = iocb->ki_pos; const size_t count = iov_iter_count(iter); struct f2fs_map_blocks map = {}; - bool dio = (iocb->ki_flags & IOCB_DIRECT) && - !f2fs_force_buffered_io(inode, iocb, iter); int flag; int ret; @@ -4352,13 +4374,153 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter) return map.m_len; } -static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + ssize_t ret; + + if (iocb->ki_flags & IOCB_NOWAIT) + return -EOPNOTSUPP; + + current->backing_dev_info = inode_to_bdi(inode); + ret = generic_perform_write(file, from, iocb->ki_pos); + current->backing_dev_info = NULL; + + if (ret > 0) { + iocb->ki_pos += ret; + f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret); + } + return ret; +} + +static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, + bool *may_need_sync) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + const bool do_opu = f2fs_lfs_mode(sbi); + const int whint_mode = F2FS_OPTION(sbi).whint_mode; + const loff_t pos = iocb->ki_pos; + const ssize_t count = iov_iter_count(from); + const enum rw_hint hint = iocb->ki_hint; + unsigned int dio_flags = 0; + ssize_t ret; + + trace_f2fs_direct_IO_enter(inode, pos, count, WRITE); + + if (iocb->ki_flags & IOCB_NOWAIT) { + /* f2fs_convert_inline_inode() and block allocation can block */ + if (f2fs_has_inline_data(inode) || + !f2fs_overwrite_io(inode, pos, count)) { + ret = -EAGAIN; + goto out; + } + } else { + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto out; + } + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) { + ret = -EAGAIN; + goto out; + } + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { + up_read(&fi->i_gc_rwsem[WRITE]); + ret = -EAGAIN; + goto out; + } + } else { + down_read(&fi->i_gc_rwsem[WRITE]); + if (do_opu) + down_read(&fi->i_gc_rwsem[READ]); + } + + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = WRITE_LIFE_NOT_SET; + + if (pos + count > inode->i_size) + dio_flags |= IOMAP_DIO_FORCE_WAIT; + ret = iomap_dio_rw(iocb, from, &f2fs_iomap_ops, &f2fs_iomap_dio_ops, + dio_flags); + if (ret == -ENOTBLK) + ret = 0; + + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = hint; + + if (do_opu) + up_read(&fi->i_gc_rwsem[READ]); + + up_read(&fi->i_gc_rwsem[WRITE]); + + if (ret < 0) { + if (ret == -EIOCBQUEUED) + f2fs_update_iostat(sbi, APP_DIRECT_IO, + count - iov_iter_count(from)); + goto out; + } + if (pos + ret > inode->i_size) + f2fs_i_size_write(inode, pos + ret); + f2fs_update_iostat(sbi, APP_DIRECT_IO, ret); + if (!do_opu) + set_inode_flag(inode, FI_UPDATE_WRITE); + + if (iov_iter_count(from)) { + ssize_t ret2; + loff_t bufio_start_pos = iocb->ki_pos; + + /* + * The direct write was partial, so we need to fall back to a + * buffered write for the remainder. + */ + + ret2 = f2fs_buffered_write_iter(iocb, from); + if (iov_iter_count(from)) + f2fs_write_failed(inode, iocb->ki_pos); + if (ret2 < 0) + goto out; + + /* + * Ensure that the pagecache pages are written to disk and + * invalidated to preserve the expected O_DIRECT semantics. + */ + if (ret2 > 0) { + loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; + + ret += ret2; + + ret2 = filemap_write_and_wait_range(file->f_mapping, + bufio_start_pos, + bufio_end_pos); + if (ret2 < 0) + goto out; + invalidate_mapping_pages(file->f_mapping, + bufio_start_pos >> PAGE_SHIFT, + bufio_end_pos >> PAGE_SHIFT); + } + } else { + /* iomap_dio_rw() already handled the generic_write_sync(). */ + *may_need_sync = false; + } +out: + trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); + return ret; +} + +static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); const loff_t orig_pos = iocb->ki_pos; const size_t orig_count = iov_iter_count(from); loff_t target_size; + bool dio; + bool may_need_sync = true; int preallocated; ssize_t ret; @@ -4381,48 +4543,26 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); } - if (unlikely(IS_IMMUTABLE(inode))) { - ret = -EPERM; - goto out_unlock; - } - - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { - ret = -EPERM; - goto out_unlock; - } - - ret = generic_write_checks(iocb, from); + ret = f2fs_write_checks(iocb, from); if (ret <= 0) goto out_unlock; - if (iocb->ki_flags & IOCB_NOWAIT) { - if (!f2fs_overwrite_io(inode, iocb->ki_pos, - iov_iter_count(from)) || - f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, iocb, from)) { - ret = -EAGAIN; - goto out_unlock; - } - } - if (iocb->ki_flags & IOCB_DIRECT) { - /* - * Convert inline data for Direct I/O before entering - * f2fs_direct_IO(). - */ - ret = f2fs_convert_inline_inode(inode); - if (ret) - goto out_unlock; - } + /* Determine whether we will do a direct write or a buffered write. */ + dio = f2fs_should_use_dio(inode, iocb, from); /* Possibly preallocate the blocks for the write. */ target_size = iocb->ki_pos + iov_iter_count(from); - preallocated = f2fs_preallocate_blocks(iocb, from); + preallocated = f2fs_preallocate_blocks(iocb, from, dio); if (preallocated < 0) { ret = preallocated; goto out_unlock; } - ret = __generic_file_write_iter(iocb, from); + /* Do the actual write. */ + if (dio) + ret = f2fs_dio_write_iter(iocb, from, &may_need_sync); + else + ret = f2fs_buffered_write_iter(iocb, from); /* Don't leave any preallocated blocks around past i_size. */ if (preallocated > 0 && inode->i_size < target_size) { @@ -4433,14 +4573,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } clear_inode_flag(inode, FI_PREALLOCATED_ALL); - - if (ret > 0) - f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); out_unlock: inode_unlock(inode); out: trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); - if (ret > 0) + if (ret > 0 && may_need_sync) ret = generic_write_sync(iocb, ret); return ret; } -- 2.32.0