According to xfstest generic/240, applications see, to expect direct I/O writes to either complete as a whole or to fail; short direct I/O writes are apparently not appreciated. This means that when only part of an asynchronous direct I/O write succeeds, we can either fail the entire write, or we can wait wait for the partial write to complete and retry the remaining write using buffered I/O. The old __blockdev_direct_IO helper has code for waiting for partial writes to complete; the new iomap_dio_rw iomap helper does not. The above mentioned fallback mode is used by gfs2, which doesn't allow block allocations under direct I/O to avoid taking cluster-wide exclusive locks. As a consequence, an asynchronous direct I/O write to a file range that ends in a hole will result in a short write. When that happens, we want to retry the remaining write using buffered I/O. To allow that, change iomap_dio_rw to wait for short direct I/O writes like __blockdev_direct_IO does instead of returning -EIOCBQUEUED. This fixes xfstest generic/240 on gfs2. Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx> Cc: Dave Chinner <dchinner@xxxxxxxxxx> --- fs/iomap.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 27d97a290623..befddf91fb38 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -821,9 +821,8 @@ static void iomap_dio_bio_end_io(struct bio *bio) iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); if (atomic_dec_and_test(&dio->ref)) { - if (is_sync_kiocb(dio->iocb)) { - struct task_struct *waiter = dio->submit.waiter; - + struct task_struct *waiter = dio->submit.waiter; + if (waiter) { WRITE_ONCE(dio->submit.waiter, NULL); wake_up_process(waiter); } else if (dio->flags & IOMAP_DIO_WRITE) { @@ -997,6 +996,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, unsigned int flags = IOMAP_DIRECT; struct blk_plug plug; struct iomap_dio *dio; + bool wait_for_completion = is_sync_kiocb(iocb); lockdep_assert_held(&inode->i_rwsem); @@ -1016,11 +1016,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->flags = 0; dio->submit.iter = iter; - if (is_sync_kiocb(iocb)) { - dio->submit.waiter = current; - dio->submit.cookie = BLK_QC_T_NONE; - dio->submit.last_queue = NULL; - } if (iov_iter_rw(iter) == READ) { if (pos >= dio->i_size) @@ -1057,7 +1052,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; - if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && + if (iov_iter_rw(iter) == WRITE && !wait_for_completion && !inode->i_sb->s_dio_done_wq) { ret = sb_init_dio_done_wq(inode->i_sb); if (ret < 0) @@ -1074,6 +1069,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, /* magic error code to fall back to buffered I/O */ if (ret == -ENOTBLK) ret = 0; + if (iov_iter_rw(iter) == WRITE) + wait_for_completion = true; break; } pos += ret; @@ -1081,13 +1078,20 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == READ && pos >= dio->i_size) break; } while ((count = iov_iter_count(iter)) > 0); + + dio->submit.waiter = NULL; + if (wait_for_completion) { + dio->submit.waiter = current; + dio->submit.cookie = BLK_QC_T_NONE; + dio->submit.last_queue = NULL; + } blk_finish_plug(&plug); if (ret < 0) iomap_dio_set_error(dio, ret); if (!atomic_dec_and_test(&dio->ref)) { - if (!is_sync_kiocb(iocb)) + if (!wait_for_completion) return -EIOCBQUEUED; for (;;) { -- 2.17.0