For block devices, use the DIO_SYNC_WRITES so that flushes are issued /after/ the write completes, not before. Furthermore, we need to use the bdevfs workqueue to issue the flush, not the fs that just happened to contain the device node. This patch requires Jeff Moyer's "fix up AIO+DIO+O_SYNC to actually do the sync part" patch set. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/block_dev.c | 5 +++-- fs/direct-io.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 1a1e5e3..05ff33a 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -235,7 +235,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct inode *inode = file->f_mapping->host; return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, - nr_segs, blkdev_get_blocks, NULL, NULL, 0); + nr_segs, blkdev_get_blocks, NULL, NULL, + DIO_SYNC_WRITES); } int __sync_blockdev(struct block_device *bdev, int wait) @@ -1631,7 +1632,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, percpu_down_read(&bdev->bd_block_size_semaphore); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); - if (ret > 0 || ret == -EIOCBQUEUED) { + if (ret > 0) { ssize_t err; err = generic_write_sync(file, pos, ret); diff --git a/fs/direct-io.c b/fs/direct-io.c index 25dbd14..33c0bc2 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -258,7 +258,7 @@ void generic_dio_end_io(struct kiocb *iocb, loff_t offset, ssize_t bytes, work->ret = ret; work->offset = offset; work->len = bytes; - queue_work(inode->i_sb->s_dio_flush_wq, &work->work); + queue_work(iocb->ki_filp->f_mapping->host->i_sb->s_dio_flush_wq, &work->work); } else { aio_complete(iocb, ret, 0); inode_dio_done(inode); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html