Btrfs is terrible with O_DIRECT|O_SYNC, mostly because of the constant waiting. The thing is we have a handy way of waiting for IO that we can delay to the very last second so we do all of the O_SYNC work and then wait for a bunch of IO to complete. So introduce a flag to allow the generic direct io stuff to forgo waiting and leave that up to the file system. Thanks, Signed-off-by: Josef Bacik <jbacik@xxxxxxxxxxxx> --- fs/direct-io.c | 36 +++++++++++++++++++++++++++++------- include/linux/fs.h | 3 +++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index f86c720..ae31c183 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -299,19 +299,35 @@ static void dio_bio_end_aio(struct bio *bio, int error) * handler. * * During I/O bi_private points at the dio. After I/O, bi_private is used to - * implement a singly-linked list of completed BIOs, at dio->bio_list. + * implement a singly-linked list of completed BIOs, at dio->bio_list, but only + * if the file system isn't doing its own waiting. */ static void dio_bio_end_io(struct bio *bio, int error) { struct dio *dio = bio->bi_private; unsigned long flags; + unsigned long remaining; + bool own_waiting = (dio->rw == WRITE && + (dio->flags & DIO_OWN_WAITING)); + + if (own_waiting) + dio_bio_complete(dio, bio); spin_lock_irqsave(&dio->bio_lock, flags); - bio->bi_private = dio->bio_list; - dio->bio_list = bio; - if (--dio->refcount == 1 && dio->waiter) + if (!own_waiting) { + bio->bi_private = dio->bio_list; + dio->bio_list = bio; + } + remaining = --dio->refcount; + if (remaining == 1 && dio->waiter) wake_up_process(dio->waiter); spin_unlock_irqrestore(&dio->bio_lock, flags); + + if (remaining == 0) { + BUG_ON(!(dio->flags & DIO_OWN_WAITING)); + dio_complete(dio, dio->iocb->ki_pos, 0, false); + kmem_cache_free(dio_cache, dio); + } } /** @@ -1266,14 +1282,20 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ((rw == READ) || (dio->result == sdio.size))) retval = -EIOCBQUEUED; - if (retval != -EIOCBQUEUED) + if (retval != -EIOCBQUEUED && + (rw == READ || !(flags & DIO_OWN_WAITING))) dio_await_completion(dio); if (drop_refcount(dio) == 0) { retval = dio_complete(dio, offset, retval, false); kmem_cache_free(dio_cache, dio); - } else - BUG_ON(retval != -EIOCBQUEUED); + } else { + BUG_ON(retval != -EIOCBQUEUED && !(flags & DIO_OWN_WAITING)); + + /* Need to return how much data we should be waiting for */ + if (!retval && flags & DIO_OWN_WAITING) + retval = dio->result; + } out: return retval; diff --git a/include/linux/fs.h b/include/linux/fs.h index b33cfc9..c7944d1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2433,6 +2433,9 @@ enum { /* filesystem does not support filling holes */ DIO_SKIP_HOLES = 0x02, + + /* filesystem will do it's own waiting thank you! */ + DIO_OWN_WAITING = 0x04, }; void dio_end_io(struct bio *bio, int error); -- 1.7.7.6 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html