Re: [PATCH] block: avoid extra bio reference for async O_DIRECT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/29/18 3:55 PM, Jens Axboe wrote:
> The bio referencing has a trick that doesn't do any actual atomic
> inc/dec on the reference count until we have to elevator to > 1. For the
> async IO O_DIRECT case, we can't use the simple DIO variants, so we use
> __blkdev_direct_IO(). It always grabs an extra reference to the bio
> after allocation, which means we then enter the slower path of actually
> having to do atomic_inc/dec on the count.
> 
> We don't need to do that for the async case, unless we end up going
> multi-bio, in which case we're already doing huge amounts of IO. For the
> smaller IO case (< BIO_MAX_PAGES), we can do without the extra ref.

I accidentally generated this against the aio-poll branch, here's a
version that is not.


diff --git a/fs/block_dev.c b/fs/block_dev.c
index d233a59ea364..5c0a224bf9cb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -281,10 +281,25 @@ struct blkdev_dio {
 
 static struct bio_set blkdev_dio_pool;
 
+static void blkdev_bio_dirty_release(struct bio *bio, bool should_dirty)
+{
+	if (should_dirty) {
+		bio_check_pages_dirty(bio);
+	} else {
+		struct bio_vec *bvec;
+		int i;
+
+		bio_for_each_segment_all(bvec, bio, i)
+			put_page(bvec->bv_page);
+		bio_put(bio);
+	}
+}
+
 static void blkdev_bio_end_io(struct bio *bio)
 {
 	struct blkdev_dio *dio = bio->bi_private;
 	bool should_dirty = dio->should_dirty;
+	bool should_free = false;
 
 	if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) {
 		if (bio->bi_status && !dio->bio.bi_status)
@@ -302,25 +317,20 @@ static void blkdev_bio_end_io(struct bio *bio)
 			}
 
 			dio->iocb->ki_complete(iocb, ret, 0);
-			bio_put(&dio->bio);
 		} else {
 			struct task_struct *waiter = dio->waiter;
 
 			WRITE_ONCE(dio->waiter, NULL);
 			blk_wake_io_task(waiter);
 		}
+		/* last bio is done, now we can free the parent holding dio */
+		should_free = dio->multi_bio;
 	}
 
-	if (should_dirty) {
-		bio_check_pages_dirty(bio);
-	} else {
-		struct bio_vec *bvec;
-		int i;
-
-		bio_for_each_segment_all(bvec, bio, i)
-			put_page(bvec->bv_page);
-		bio_put(bio);
-	}
+	if (!dio->is_sync)
+		blkdev_bio_dirty_release(bio, should_dirty);
+	if (should_free)
+		bio_put(&dio->bio);
 }
 
 static ssize_t
@@ -343,14 +353,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		return -EINVAL;
 
 	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
-	bio_get(bio); /* extra ref for the completion handler */
 
 	dio = container_of(bio, struct blkdev_dio, bio);
 	dio->is_sync = is_sync = is_sync_kiocb(iocb);
-	if (dio->is_sync)
+	if (dio->is_sync) {
 		dio->waiter = current;
-	else
+		bio_get(bio);
+	} else {
 		dio->iocb = iocb;
+	}
 
 	dio->size = 0;
 	dio->multi_bio = false;
@@ -400,6 +411,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 		}
 
 		if (!dio->multi_bio) {
+			/*
+			 * async needs an extra reference if we are goign
+			 * multi-bio only, so we can ensure that 'dio'
+			 * sticks around.
+			 */
+			if (!is_sync)
+				bio_get(bio);
 			dio->multi_bio = true;
 			atomic_set(&dio->ref, 2);
 		} else {
@@ -433,6 +451,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	if (likely(!ret))
 		ret = dio->size;
 
+	blkdev_bio_dirty_release(bio, dio->should_dirty);
 	bio_put(&dio->bio);
 	return ret;
 }

-- 
Jens Axboe




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux