[PATCH 2/2] [PATCH 2/2] direct-io: handle handle O_(D)SYNC AIO

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Call generic_write_sync from the deferred I/O completion handler if
O_DSYNC is set for a write request.  Also make sure various callers
don't call generic_write_sync if the direct I/O code returns
-EIOCBQUEUED.

Note: this currently breaks ext4 due to it's convoluted unwritten
extent conversion code.  I've tried to understand it and as far
as I can see it's a workaround for the fact that ext4 marks page
writeback as completed before converting unwritten extents.
Ext4 should follow xfs on this and only mark writeback as completed
when it really is and at that point can remove the big hairy mess to
force unwritten extent conversions from fsync, truncate and a few other
places.

Based on an earlier patch from Jan Kara <jack@xxxxxxx> with updates from
Jeff Moyer <jmoyer@xxxxxxxxxx> and Darrick J. Wong <darrick.wong@xxxxxxxxxx>.

Signed-off-by: Christoph Hellwig <hch@xxxxxx>

---
 fs/block_dev.c  |    2 +-
 fs/btrfs/file.c |    2 +-
 fs/cifs/file.c  |    2 +-
 fs/direct-io.c  |   22 +++++++++++++++++++++-
 fs/ext4/file.c  |    2 +-
 mm/filemap.c    |    2 +-
 6 files changed, 26 insertions(+), 6 deletions(-)

Index: linux-2.6/fs/block_dev.c
===================================================================
--- linux-2.6.orig/fs/block_dev.c	2012-11-21 21:19:34.075136013 +0100
+++ linux-2.6/fs/block_dev.c	2012-11-21 21:23:51.227142598 +0100
@@ -1631,7 +1631,7 @@ ssize_t blkdev_aio_write(struct kiocb *i
 	percpu_down_read(&bdev->bd_block_size_semaphore);
 
 	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
-	if (ret > 0 || ret == -EIOCBQUEUED) {
+	if (ret > 0) {
 		ssize_t err;
 
 		err = generic_write_sync(file, pos, ret);
Index: linux-2.6/fs/btrfs/file.c
===================================================================
--- linux-2.6.orig/fs/btrfs/file.c	2012-11-21 21:19:34.075136013 +0100
+++ linux-2.6/fs/btrfs/file.c	2012-11-21 21:23:51.231142597 +0100
@@ -1495,7 +1495,7 @@ static ssize_t btrfs_file_aio_write(stru
 	 * one running right now.
 	 */
 	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
-	if (num_written > 0 || num_written == -EIOCBQUEUED) {
+	if (num_written > 0) {
 		err = generic_write_sync(file, pos, num_written);
 		if (err < 0 && num_written > 0)
 			num_written = err;
Index: linux-2.6/fs/cifs/file.c
===================================================================
--- linux-2.6.orig/fs/cifs/file.c	2012-11-21 21:19:34.075136013 +0100
+++ linux-2.6/fs/cifs/file.c	2012-11-21 21:23:51.231142597 +0100
@@ -2464,7 +2464,7 @@ cifs_writev(struct kiocb *iocb, const st
 		mutex_unlock(&inode->i_mutex);
 	}
 
-	if (rc > 0 || rc == -EIOCBQUEUED) {
+	if (rc > 0) {
 		ssize_t err;
 
 		err = generic_write_sync(file, pos, rc);
Index: linux-2.6/fs/ext4/file.c
===================================================================
--- linux-2.6.orig/fs/ext4/file.c	2012-11-21 21:19:34.075136013 +0100
+++ linux-2.6/fs/ext4/file.c	2012-11-21 21:23:51.231142597 +0100
@@ -155,7 +155,7 @@ ext4_file_dio_write(struct kiocb *iocb,
 	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 
-	if (ret > 0 || ret == -EIOCBQUEUED) {
+	if (ret > 0) {
 		ssize_t err;
 
 		err = generic_write_sync(file, pos, ret);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c	2012-11-21 21:19:34.075136013 +0100
+++ linux-2.6/mm/filemap.c	2012-11-21 21:23:51.235142597 +0100
@@ -2532,7 +2532,7 @@ ssize_t generic_file_aio_write(struct ki
 	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 
-	if (ret > 0 || ret == -EIOCBQUEUED) {
+	if (ret > 0) {
 		ssize_t err;
 
 		err = generic_write_sync(file, pos, ret);
Index: linux-2.6/fs/direct-io.c
===================================================================
--- linux-2.6.orig/fs/direct-io.c	2012-11-21 21:22:57.875141232 +0100
+++ linux-2.6/fs/direct-io.c	2012-11-21 21:23:51.235142597 +0100
@@ -264,8 +264,19 @@ static ssize_t dio_complete(struct dio *
 	if (dio->result && dio->end_io)
 		dio->end_io(dio->iocb, offset, transferred, dio->private);
 
-	if (is_async)
+	if (is_async) {
+		if (dio->rw & WRITE) {
+			int err;
+
+			err = generic_write_sync(dio->iocb->ki_filp, offset,
+						 transferred);
+			if (err < 0 && ret > 0)
+				ret = err;
+		}
+
 		aio_complete(dio->iocb, ret, 0);
+	}
+
 	inode_dio_done(dio->inode);
 
 	kmem_cache_free(dio_cache, dio);
@@ -1163,6 +1174,15 @@ do_blockdev_direct_IO(int rw, struct kio
 	dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
 		(end > i_size_read(inode)));
 
+	/*
+	 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
+	 * so that we can call ->fsync.
+	 */
+	if (dio->is_async && (rw & WRITE) &&
+	    ((iocb->ki_filp->f_flags & O_DSYNC) ||
+	     IS_SYNC(iocb->ki_filp->f_mapping->host)))
+		dio->defer_completion = true;
+
 	retval = 0;
 
 	dio->inode = inode;

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux