[PATCH] direct-io: allow file systems to do their own waiting for io V2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Btrfs is terrible with O_DIRECT|O_SYNC, mostly because of the constant
waiting.  The thing is we have a handy way of waiting for IO that we can
delay to the very last second so we do all of the O_SYNC work and then wait
for a bunch of IO to complete.  So introduce a flag to allow the generic
direct io stuff to forgo waiting and leave that up to the file system.
Thanks,

Signed-off-by: Josef Bacik <jbacik@xxxxxxxxxxxx>
---
V1->V2: fix stupid rw == WRITE bug.

 fs/direct-io.c     |   36 +++++++++++++++++++++++++++++-------
 include/linux/fs.h |    3 +++
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index f86c720..4e1cdb4 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -299,19 +299,35 @@ static void dio_bio_end_aio(struct bio *bio, int error)
  * handler.
  *
  * During I/O bi_private points at the dio.  After I/O, bi_private is used to
- * implement a singly-linked list of completed BIOs, at dio->bio_list.
+ * implement a singly-linked list of completed BIOs, at dio->bio_list, but only
+ * if the file system isn't doing its own waiting.
  */
 static void dio_bio_end_io(struct bio *bio, int error)
 {
 	struct dio *dio = bio->bi_private;
 	unsigned long flags;
+	unsigned long remaining;
+	bool own_waiting = ((dio->rw & WRITE) &&
+			    (dio->flags & DIO_OWN_WAITING));
+
+	if (own_waiting)
+		dio_bio_complete(dio, bio);
 
 	spin_lock_irqsave(&dio->bio_lock, flags);
-	bio->bi_private = dio->bio_list;
-	dio->bio_list = bio;
-	if (--dio->refcount == 1 && dio->waiter)
+	if (!own_waiting) {
+		bio->bi_private = dio->bio_list;
+		dio->bio_list = bio;
+	}
+	remaining = --dio->refcount;
+	if (remaining == 1 && dio->waiter)
 		wake_up_process(dio->waiter);
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
+
+	if (remaining == 0) {
+		BUG_ON(!(dio->flags & DIO_OWN_WAITING));
+		dio_complete(dio, dio->iocb->ki_pos, 0, false);
+		kmem_cache_free(dio_cache, dio);
+	}
 }
 
 /**
@@ -1266,14 +1282,20 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	    ((rw == READ) || (dio->result == sdio.size)))
 		retval = -EIOCBQUEUED;
 
-	if (retval != -EIOCBQUEUED)
+	if (retval != -EIOCBQUEUED &&
+	    (rw == READ || !(flags & DIO_OWN_WAITING)))
 		dio_await_completion(dio);
 
 	if (drop_refcount(dio) == 0) {
 		retval = dio_complete(dio, offset, retval, false);
 		kmem_cache_free(dio_cache, dio);
-	} else
-		BUG_ON(retval != -EIOCBQUEUED);
+	} else {
+		BUG_ON(retval != -EIOCBQUEUED && !(flags & DIO_OWN_WAITING));
+
+		/* Need to return how much data we should be waiting for */
+		if (!retval && flags & DIO_OWN_WAITING)
+			retval = dio->result;
+	}
 
 out:
 	return retval;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b33cfc9..c7944d1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2433,6 +2433,9 @@ enum {
 
 	/* filesystem does not support filling holes */
 	DIO_SKIP_HOLES	= 0x02,
+
+	/* filesystem will do it's own waiting thank you! */
+	DIO_OWN_WAITING = 0x04,
 };
 
 void dio_end_io(struct bio *bio, int error);
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux