[PATCH] block: avoid extra bio reference for async O_DIRECT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The bio referencing has a trick that doesn't do any actual atomic
inc/dec on the reference count until we have to elevator to > 1. For the
async IO O_DIRECT case, we can't use the simple DIO variants, so we use
__blkdev_direct_IO(). It always grabs an extra reference to the bio
after allocation, which means we then enter the slower path of actually
having to do atomic_inc/dec on the count.

We don't need to do that for the async case, unless we end up going
multi-bio, in which case we're already doing huge amounts of IO. For the
smaller IO case (< BIO_MAX_PAGES), we can do without the extra ref.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>

---

diff --git a/fs/block_dev.c b/fs/block_dev.c
index f3bd41f80421..44fc9da13f68 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -292,10 +292,26 @@ static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
 	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
 }
 
+static void blkdev_bio_dirty_release(struct bio *bio, bool should_dirty)
+{
+	if (should_dirty) {
+		bio_check_pages_dirty(bio);
+	} else {
+		struct bio_vec *bvec;
+		int i;
+
+		if (!bio_flagged(bio, BIO_HOLD_PAGES))
+			bio_for_each_segment_all(bvec, bio, i)
+				put_page(bvec->bv_page);
+		bio_put(bio);
+	}
+}
+
 static void blkdev_bio_end_io(struct bio *bio)
 {
 	struct blkdev_dio *dio = bio->bi_private;
 	bool should_dirty = dio->should_dirty;
+	bool should_free = false;
 
 	if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
 		if (bio->bi_status && !dio->bio.bi_status)
@@ -313,26 +329,20 @@ static void blkdev_bio_end_io(struct bio *bio)
 			}
 
 			dio->iocb->ki_complete(iocb, ret, 0);
-			bio_put(&dio->bio);
 		} else {
 			struct task_struct *waiter = dio->waiter;
 
 			WRITE_ONCE(dio->waiter, NULL);
 			blk_wake_io_task(waiter);
 		}
+		/* last bio is done, now we can free the parent holding dio */
+		should_free = dio->multi_bio;
 	}
 
-	if (should_dirty) {
-		bio_check_pages_dirty(bio);
-	} else {
-		struct bio_vec *bvec;
-		int i;
-
-		if (!bio_flagged(bio, BIO_HOLD_PAGES))
-			bio_for_each_segment_all(bvec, bio, i)
-				put_page(bvec->bv_page);
-		bio_put(bio);
-	}
+	if (!dio->is_sync)
+		blkdev_bio_dirty_release(bio, should_dirty);
+	if (should_free)
+		bio_put(&dio->bio);
 }
 
 static ssize_t
@@ -355,14 +365,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		return -EINVAL;
 
 	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
-	bio_get(bio); /* extra ref for the completion handler */
 
 	dio = container_of(bio, struct blkdev_dio, bio);
 	dio->is_sync = is_sync = is_sync_kiocb(iocb);
-	if (dio->is_sync)
+	if (dio->is_sync) {
 		dio->waiter = current;
-	else
+		bio_get(bio);
+	} else {
 		dio->iocb = iocb;
+	}
 
 	dio->size = 0;
 	dio->multi_bio = false;
@@ -425,6 +436,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		}
 
 		if (!dio->multi_bio) {
+			/*
+			 * async needs an extra reference if we are goign
+			 * multi-bio only, so we can ensure that 'dio'
+			 * sticks around.
+			 */
+			if (!is_sync)
+				bio_get(bio);
 			dio->multi_bio = true;
 			atomic_set(&dio->ref, 2);
 		} else {
@@ -458,6 +476,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	if (likely(!ret))
 		ret = dio->size;
 
+	blkdev_bio_dirty_release(bio, dio->should_dirty);
 	bio_put(&dio->bio);
 	return ret;
 }

-- 
Jens Axboe




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux