bdev based polled O_DIRECT is currently quite a bit faster than passthru on the same device, and one of the reaons is that we're not able to use the bio caching for passthru IO. If REQ_POLLED is set on the request, use the fs bio set for grabbing a bio from the caches, if available. This saves 5-6% of CPU over head for polled passthru IO. Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- block/blk-map.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 4043c5809cd4..5da03f2614eb 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -231,6 +231,16 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, return ret; } +static void bio_map_put(struct bio *bio) +{ + if (bio->bi_opf & REQ_ALLOC_CACHE) { + bio_put(bio); + } else { + bio_uninit(bio); + kfree(bio); + } +} + static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, gfp_t gfp_mask) { @@ -243,10 +253,19 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, if (!iov_iter_count(iter)) return -EINVAL; - bio = bio_kmalloc(nr_vecs, gfp_mask); - if (!bio) - return -ENOMEM; - bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); + if (rq->cmd_flags & REQ_POLLED) { + blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE; + + bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask, + &fs_bio_set); + if (!bio) + return -ENOMEM; + } else { + bio = bio_kmalloc(nr_vecs, gfp_mask); + if (!bio) + return -ENOMEM; + bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); + } while (iov_iter_count(iter)) { struct page **pages; @@ -304,8 +323,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, out_unmap: bio_release_pages(bio, false); - bio_uninit(bio); - kfree(bio); + bio_map_put(bio); return ret; } @@ -610,8 +628,7 @@ int blk_rq_unmap_user(struct bio *bio) next_bio = bio; bio = bio->bi_next; - bio_uninit(next_bio); - kfree(next_bio); + bio_map_put(next_bio); } return ret; -- 2.35.1