[PATCH 4/4] nvme: add support for mq_ops->queue_rqs()

Jens Axboe <axboe@xxxxxxxxx> · Tue, 16 Nov 2021 20:38:07 -0700

This enables the block layer to send us a full plug list of requests
that need submitting. The block layer guarantees that they all belong
to the same queue, but we do have to check the hardware queue mapping
for each request.

If errors are encountered, leave them in the passed in list. Then the
block layer will handle them individually.

This is good for about a 4% improvement in peak performance, taking us
from 9.6M to 10M IOPS/core.

Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
---
 drivers/nvme/host/pci.c | 67 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d2b654fc3603..2eedd04b1f90 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1004,6 +1004,72 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return ret;
 }
 
+static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct request **rqlist)
+{
+	spin_lock(&nvmeq->sq_lock);
+	while (!rq_list_empty(*rqlist)) {
+		struct request *req = rq_list_pop(rqlist);
+		struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+
+		nvme_copy_cmd(nvmeq, absolute_pointer(&iod->cmd));
+	}
+	nvme_write_sq_db(nvmeq, true);
+	spin_unlock(&nvmeq->sq_lock);
+}
+
+static void nvme_queue_rqs(struct request **rqlist)
+{
+	struct request *requeue_list = NULL, *req, *prev = NULL;
+	struct blk_mq_hw_ctx *hctx;
+	struct nvme_queue *nvmeq;
+	struct nvme_ns *ns;
+
+restart:
+	req = rq_list_peek(rqlist);
+	hctx = req->mq_hctx;
+	nvmeq = hctx->driver_data;
+	ns = hctx->queue->queuedata;
+
+	/*
+	 * We should not need to do this, but we're still using this to
+	 * ensure we can drain requests on a dying queue.
+	 */
+	if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
+		return;
+
+	rq_list_for_each(rqlist, req) {
+		struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+		blk_status_t ret;
+
+		if (unlikely(!nvme_check_ready(&nvmeq->dev->ctrl, req, true)))
+			goto requeue;
+
+		if (req->mq_hctx != hctx) {
+			/* detach rest of list, and submit */
+			prev->rq_next = NULL;
+			nvme_submit_cmds(nvmeq, rqlist);
+			/* req now start of new list for this hw queue */
+			*rqlist = req;
+			goto restart;
+		}
+
+		hctx->tags->rqs[req->tag] = req;
+		ret = nvme_prep_rq(nvmeq->dev, ns, req, &iod->cmd);
+		if (ret == BLK_STS_OK) {
+			prev = req;
+			continue;
+		}
+requeue:
+		/* detach 'req' and add to remainder list */
+		if (prev)
+			prev->rq_next = req->rq_next;
+		rq_list_add(&requeue_list, req);
+	}
+
+	nvme_submit_cmds(nvmeq, rqlist);
+	*rqlist = requeue_list;
+}
+
 static __always_inline void nvme_pci_unmap_rq(struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -1741,6 +1807,7 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
 
 static const struct blk_mq_ops nvme_mq_ops = {
 	.queue_rq	= nvme_queue_rq,
+	.queue_rqs	= nvme_queue_rqs,
 	.complete	= nvme_pci_complete_rq,
 	.commit_rqs	= nvme_commit_rqs,
 	.init_hctx	= nvme_init_hctx,
-- 
2.33.1