Call generic_write_sync from the deferred I/O completion handler if O_DSYNC is set for a write request. Also make sure various callers don't call generic_write_sync if the direct I/O code returns -EIOCBQUEUED. Note: this currently breaks ext4 due to it's convoluted unwritten extent conversion code. I've tried to understand it and as far as I can see it's a workaround for the fact that ext4 marks page writeback as completed before converting unwritten extents. Ext4 should follow xfs on this and only mark writeback as completed when it really is and at that point can remove the big hairy mess to force unwritten extent conversions from fsync, truncate and a few other places. Based on an earlier patch from Jan Kara <jack@xxxxxxx> with updates from Jeff Moyer <jmoyer@xxxxxxxxxx> and Darrick J. Wong <darrick.wong@xxxxxxxxxx>. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- fs/block_dev.c | 2 +- fs/btrfs/file.c | 2 +- fs/cifs/file.c | 2 +- fs/direct-io.c | 22 +++++++++++++++++++++- fs/ext4/file.c | 2 +- mm/filemap.c | 2 +- 6 files changed, 26 insertions(+), 6 deletions(-) Index: linux-2.6/fs/block_dev.c =================================================================== --- linux-2.6.orig/fs/block_dev.c 2012-11-21 21:19:34.075136013 +0100 +++ linux-2.6/fs/block_dev.c 2012-11-21 21:23:51.227142598 +0100 @@ -1631,7 +1631,7 @@ ssize_t blkdev_aio_write(struct kiocb *i percpu_down_read(&bdev->bd_block_size_semaphore); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); - if (ret > 0 || ret == -EIOCBQUEUED) { + if (ret > 0) { ssize_t err; err = generic_write_sync(file, pos, ret); Index: linux-2.6/fs/btrfs/file.c =================================================================== --- linux-2.6.orig/fs/btrfs/file.c 2012-11-21 21:19:34.075136013 +0100 +++ linux-2.6/fs/btrfs/file.c 2012-11-21 21:23:51.231142597 +0100 @@ -1495,7 +1495,7 @@ static ssize_t btrfs_file_aio_write(stru * one running right now. */ BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; - if (num_written > 0 || num_written == -EIOCBQUEUED) { + if (num_written > 0) { err = generic_write_sync(file, pos, num_written); if (err < 0 && num_written > 0) num_written = err; Index: linux-2.6/fs/cifs/file.c =================================================================== --- linux-2.6.orig/fs/cifs/file.c 2012-11-21 21:19:34.075136013 +0100 +++ linux-2.6/fs/cifs/file.c 2012-11-21 21:23:51.231142597 +0100 @@ -2464,7 +2464,7 @@ cifs_writev(struct kiocb *iocb, const st mutex_unlock(&inode->i_mutex); } - if (rc > 0 || rc == -EIOCBQUEUED) { + if (rc > 0) { ssize_t err; err = generic_write_sync(file, pos, rc); Index: linux-2.6/fs/ext4/file.c =================================================================== --- linux-2.6.orig/fs/ext4/file.c 2012-11-21 21:19:34.075136013 +0100 +++ linux-2.6/fs/ext4/file.c 2012-11-21 21:23:51.231142597 +0100 @@ -155,7 +155,7 @@ ext4_file_dio_write(struct kiocb *iocb, ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); - if (ret > 0 || ret == -EIOCBQUEUED) { + if (ret > 0) { ssize_t err; err = generic_write_sync(file, pos, ret); Index: linux-2.6/mm/filemap.c =================================================================== --- linux-2.6.orig/mm/filemap.c 2012-11-21 21:19:34.075136013 +0100 +++ linux-2.6/mm/filemap.c 2012-11-21 21:23:51.235142597 +0100 @@ -2532,7 +2532,7 @@ ssize_t generic_file_aio_write(struct ki ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); - if (ret > 0 || ret == -EIOCBQUEUED) { + if (ret > 0) { ssize_t err; err = generic_write_sync(file, pos, ret); Index: linux-2.6/fs/direct-io.c =================================================================== --- linux-2.6.orig/fs/direct-io.c 2012-11-21 21:22:57.875141232 +0100 +++ linux-2.6/fs/direct-io.c 2012-11-21 21:23:51.235142597 +0100 @@ -264,8 +264,19 @@ static ssize_t dio_complete(struct dio * if (dio->result && dio->end_io) dio->end_io(dio->iocb, offset, transferred, dio->private); - if (is_async) + if (is_async) { + if (dio->rw & WRITE) { + int err; + + err = generic_write_sync(dio->iocb->ki_filp, offset, + transferred); + if (err < 0 && ret > 0) + ret = err; + } + aio_complete(dio->iocb, ret, 0); + } + inode_dio_done(dio->inode); kmem_cache_free(dio_cache, dio); @@ -1163,6 +1174,15 @@ do_blockdev_direct_IO(int rw, struct kio dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && (end > i_size_read(inode))); + /* + * For AIO O_(D)SYNC writes we need to defer completions to a workqueue + * so that we can call ->fsync. + */ + if (dio->is_async && (rw & WRITE) && + ((iocb->ki_filp->f_flags & O_DSYNC) || + IS_SYNC(iocb->ki_filp->f_mapping->host))) + dio->defer_completion = true; + retval = 0; dio->inode = inode; -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html