Re: [PATCH 2/2] nvme: use blk-mq polling for uring commands

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Mar 24, 2023 at 02:28:03PM -0700, Keith Busch wrote:
From: Keith Busch <kbusch@xxxxxxxxxx>

The first advantage is that unshared and multipath namespaces can use
the same polling callback.

The other advantage is that we don't need a bio payload in order to
poll, allowing commands like 'flush' and 'write zeroes' to be submitted
on the same high priority queue as read and write commands.

This can also allow for a future optimization for the driver since we no
longer need to create special hidden block devices to back nvme-generic
char dev's with unsupported command sets.

Signed-off-by: Keith Busch <kbusch@xxxxxxxxxx>
---
drivers/nvme/host/ioctl.c     | 79 ++++++++++++-----------------------
drivers/nvme/host/multipath.c |  2 +-
drivers/nvme/host/nvme.h      |  2 -
3 files changed, 28 insertions(+), 55 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 723e7d5b778f2..369e8519b87a2 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -503,7 +503,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
{
	struct io_uring_cmd *ioucmd = req->end_io_data;
	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	void *cookie = READ_ONCE(ioucmd->cookie);

	req->bio = pdu->bio;
	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
@@ -516,9 +515,10 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
	 * For iopoll, complete it directly.
	 * Otherwise, move the completion to task work.
	 */
-	if (cookie != NULL && blk_rq_is_poll(req))
+	if (blk_rq_is_poll(req)) {
+		WRITE_ONCE(ioucmd->cookie, NULL);
		nvme_uring_task_cb(ioucmd);
-	else
+	} else
		io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb);

	return RQ_END_IO_FREE;
@@ -529,7 +529,6 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
{
	struct io_uring_cmd *ioucmd = req->end_io_data;
	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	void *cookie = READ_ONCE(ioucmd->cookie);

	req->bio = pdu->bio;
	pdu->req = req;
@@ -538,9 +537,10 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
	 * For iopoll, complete it directly.
	 * Otherwise, move the completion to task work.
	 */
-	if (cookie != NULL && blk_rq_is_poll(req))
+	if (blk_rq_is_poll(req)) {
+		WRITE_ONCE(ioucmd->cookie, NULL);
		nvme_uring_task_meta_cb(ioucmd);
-	else
+	} else
		io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb);

	return RQ_END_IO_NONE;
@@ -597,7 +597,6 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
	if (issue_flags & IO_URING_F_IOPOLL)
		rq_flags |= REQ_POLLED;

-retry:
	req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags);
	if (IS_ERR(req))
		return PTR_ERR(req);
@@ -611,17 +610,9 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
			return ret;
	}

-	if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) {
-		if (unlikely(!req->bio)) {
-			/* we can't poll this, so alloc regular req instead */
-			blk_mq_free_request(req);
-			rq_flags &= ~REQ_POLLED;
-			goto retry;
-		} else {
-			WRITE_ONCE(ioucmd->cookie, req->bio);
-			req->bio->bi_opf |= REQ_POLLED;
-		}
-	}
+	if (blk_rq_is_poll(req))
+		WRITE_ONCE(ioucmd->cookie, req);

blk_rq_is_poll(req) warns for null "req->bio" and returns false if that
is the case. That defeats one of the purpose of the series i.e. poll on
no-payload commands such as flush/write-zeroes.


	/* to free bio on completion, as req->bio will be null at that time */
	pdu->bio = req->bio;
	pdu->meta_len = d.metadata_len;
@@ -780,18 +771,27 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
				 struct io_comp_batch *iob,
				 unsigned int poll_flags)
{
-	struct bio *bio;
+	struct request *req;
	int ret = 0;
-	struct nvme_ns *ns;
-	struct request_queue *q;

+	/*
+	 * The rcu lock ensures the 'req' in the command cookie will not be
+	 * freed until after the unlock. The queue must be frozen to free the
+	 * request, and the freeze requires an rcu grace period. The cookie is
+	 * cleared before the request is completed, so we're fine even if a
+	 * competing polling thread completes this thread's request.
+	 */
	rcu_read_lock();
-	bio = READ_ONCE(ioucmd->cookie);
-	ns = container_of(file_inode(ioucmd->file)->i_cdev,
-			struct nvme_ns, cdev);
-	q = ns->queue;
-	if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev)
-		ret = bio_poll(bio, iob, poll_flags);
+	req = READ_ONCE(ioucmd->cookie);
+	if (req) {

This is risky. We are not sure if the cookie is actually "req" at this
moment. If driver is loaded without the poll-queues, we will not be able
to set req into ioucmd->cookie during the submission (in
nvme_uring_cmd_io). Therefore, the original code checked for QUEUE_FLAG_POLL
before treating ioucmd->cookie as bio here.

This should handle it (on top of your patch):

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 369e8519b87a..89eef5da4b5c 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -773,6 +773,8 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
{
       struct request *req;
       int ret = 0;
+       struct request_queue *q;
+       struct nvme_ns *ns;

       /*
        * The rcu lock ensures the 'req' in the command cookie will not be
@@ -783,8 +785,10 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
        */
       rcu_read_lock();
       req = READ_ONCE(ioucmd->cookie);
-       if (req) {
-               struct request_queue *q = req->q;
+       ns = container_of(file_inode(ioucmd->file)->i_cdev, struct nvme_ns,
+                       cdev);
+       q = ns->queue;
+       if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && req) {

               if (percpu_ref_tryget(&q->q_usage_counter)) {
                       ret = blk_mq_poll(q, blk_rq_to_qc(req), iob,






[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux