move to blk-mq based request processing as brd is one of the few drivers that still uses submit_bio interface. The changes are pretty trivial to start using blk-mq. The performance increases up to 125% for direct IO read workloads. There is a slight dip in performance for direct IO write workload but considering the general performance gain with blk-mq support, it is not a lot. SW queues are mapped to one hw queue in the brd device, and rest of IO processing is retained as is. Performance results with none scheduler: --direct=0 ------------------------------------------------------ | | bio(base) | blk-mq | delta | ------------------------------------------------------ | randread | 133 | 223 | +75% | ------------------------------------------------------ | read | 150 | 313 | +108% | ----------------------------------------------------- | randwrite | 111 | 109 | -1.8% | ----------------------------------------------------- | write | 118 | 117 | -0.8%| ----------------------------------------------------- --direct=1 ------------------------------------------------------ | | bio(base) | blk-mq | delta | ------------------------------------------------------ | randread | 182 | 414 | +127% | ------------------------------------------------------ | read | 190 | 429 | +125% | ----------------------------------------------------- | randwrite | 378 | 387 | +2.38%| ----------------------------------------------------- | write | 443 | 420 | -5.1% | ----------------------------------------------------- Signed-off-by: Pankaj Raghav <p.raghav@xxxxxxxxxxx> --- drivers/block/brd.c | 64 +++++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 37dce184eb56..99b37ac31532 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -16,6 +16,7 @@ #include <linux/major.h> #include <linux/blkdev.h> #include <linux/bio.h> +#include <linux/blk-mq.h> #include <linux/highmem.h> #include <linux/mutex.h> #include <linux/pagemap.h> @@ -46,6 +47,7 @@ struct brd_device { spinlock_t brd_lock; struct radix_tree_root brd_pages; u64 brd_nr_pages; + struct blk_mq_tag_set tag_set; }; /* @@ -282,36 +284,46 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page, return err; } -static void brd_submit_bio(struct bio *bio) +static blk_status_t brd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct brd_device *brd = bio->bi_bdev->bd_disk->private_data; - sector_t sector = bio->bi_iter.bi_sector; + struct request *rq = bd->rq; + struct brd_device *brd = hctx->queue->queuedata; + sector_t sector = blk_rq_pos(rq); struct bio_vec bvec; - struct bvec_iter iter; + struct req_iterator iter; + blk_status_t err = BLK_STS_OK; - bio_for_each_segment(bvec, bio, iter) { + blk_mq_start_request(bd->rq); + rq_for_each_segment(bvec, rq, iter) { unsigned int len = bvec.bv_len; - int err; + int ret; /* Don't support un-aligned buffer */ WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) || - (len & (SECTOR_SIZE - 1))); + (len & (SECTOR_SIZE - 1))); - err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, - bio_op(bio), sector); - if (err) { - bio_io_error(bio); - return; + ret = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, + req_op(rq), sector); + if (ret) { + err = BLK_STS_IOERR; + goto end_request; } sector += len >> SECTOR_SHIFT; } - bio_endio(bio); +end_request: + blk_mq_end_request(bd->rq, err); + return BLK_STS_OK; } +static const struct blk_mq_ops brd_mq_ops = { + .queue_rq = brd_queue_rq, +}; + + static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, - .submit_bio = brd_submit_bio, }; /* @@ -355,7 +367,7 @@ static int brd_alloc(int i) struct brd_device *brd; struct gendisk *disk; char buf[DISK_NAME_LEN]; - int err = -ENOMEM; + int err = 0; list_for_each_entry(brd, &brd_devices, brd_list) if (brd->brd_number == i) @@ -364,6 +376,14 @@ static int brd_alloc(int i) if (!brd) return -ENOMEM; brd->brd_number = i; + brd->tag_set.ops = &brd_mq_ops; + brd->tag_set.queue_depth = 128; + brd->tag_set.numa_node = NUMA_NO_NODE; + brd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; + brd->tag_set.cmd_size = 0; + brd->tag_set.driver_data = brd; + brd->tag_set.nr_hw_queues = 1; + list_add_tail(&brd->brd_list, &brd_devices); spin_lock_init(&brd->brd_lock); @@ -374,9 +394,17 @@ static int brd_alloc(int i) debugfs_create_u64(buf, 0444, brd_debugfs_dir, &brd->brd_nr_pages); - disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE); - if (!disk) + err = blk_mq_alloc_tag_set(&brd->tag_set); + if (err) { + err = -ENOMEM; goto out_free_dev; + } + + disk = brd->brd_disk = blk_mq_alloc_disk(&brd->tag_set, brd); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); + goto out_free_tags; + } disk->major = RAMDISK_MAJOR; disk->first_minor = i * max_part; @@ -407,6 +435,8 @@ static int brd_alloc(int i) out_cleanup_disk: put_disk(disk); +out_free_tags: + blk_mq_free_tag_set(&brd->tag_set); out_free_dev: list_del(&brd->brd_list); kfree(brd); -- 2.39.0