The bio referencing has a trick that doesn't do any actual atomic inc/dec on the reference count until we have to elevator to > 1. For the async IO O_DIRECT case, we can't use the simple DIO variants, so we use __blkdev_direct_IO(). It always grabs an extra reference to the bio after allocation, which means we then enter the slower path of actually having to do atomic_inc/dec on the count. We don't need to do that for the async case, unless we end up going multi-bio, in which case we're already doing huge amounts of IO. For the smaller IO case (< BIO_MAX_PAGES), we can do without the extra ref. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- diff --git a/fs/block_dev.c b/fs/block_dev.c index f3bd41f80421..44fc9da13f68 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -292,10 +292,26 @@ static int blkdev_iopoll(struct kiocb *kiocb, bool wait) return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait); } +static void blkdev_bio_dirty_release(struct bio *bio, bool should_dirty) +{ + if (should_dirty) { + bio_check_pages_dirty(bio); + } else { + struct bio_vec *bvec; + int i; + + if (!bio_flagged(bio, BIO_HOLD_PAGES)) + bio_for_each_segment_all(bvec, bio, i) + put_page(bvec->bv_page); + bio_put(bio); + } +} + static void blkdev_bio_end_io(struct bio *bio) { struct blkdev_dio *dio = bio->bi_private; bool should_dirty = dio->should_dirty; + bool should_free = false; if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) { if (bio->bi_status && !dio->bio.bi_status) @@ -313,26 +329,20 @@ static void blkdev_bio_end_io(struct bio *bio) } dio->iocb->ki_complete(iocb, ret, 0); - bio_put(&dio->bio); } else { struct task_struct *waiter = dio->waiter; WRITE_ONCE(dio->waiter, NULL); blk_wake_io_task(waiter); } + /* last bio is done, now we can free the parent holding dio */ + should_free = dio->multi_bio; } - if (should_dirty) { - bio_check_pages_dirty(bio); - } else { - struct bio_vec *bvec; - int i; - - if (!bio_flagged(bio, BIO_HOLD_PAGES)) - bio_for_each_segment_all(bvec, bio, i) - put_page(bvec->bv_page); - bio_put(bio); - } + if (!dio->is_sync) + blkdev_bio_dirty_release(bio, should_dirty); + if (should_free) + bio_put(&dio->bio); } static ssize_t @@ -355,14 +365,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) return -EINVAL; bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); - bio_get(bio); /* extra ref for the completion handler */ dio = container_of(bio, struct blkdev_dio, bio); dio->is_sync = is_sync = is_sync_kiocb(iocb); - if (dio->is_sync) + if (dio->is_sync) { dio->waiter = current; - else + bio_get(bio); + } else { dio->iocb = iocb; + } dio->size = 0; dio->multi_bio = false; @@ -425,6 +436,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } if (!dio->multi_bio) { + /* + * async needs an extra reference if we are goign + * multi-bio only, so we can ensure that 'dio' + * sticks around. + */ + if (!is_sync) + bio_get(bio); dio->multi_bio = true; atomic_set(&dio->ref, 2); } else { @@ -458,6 +476,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (likely(!ret)) ret = dio->size; + blkdev_bio_dirty_release(bio, dio->should_dirty); bio_put(&dio->bio); return ret; } -- Jens Axboe