[PATCH 43/55] skd: Enable request tags for the block layer queue

Bart Van Assche <bart.vanassche@xxxxxxx> · Thu, 17 Aug 2017 13:13:26 -0700

Use the request tag when allocating a skd_fitmsg_context or
skd_request_context such that the lists used to track free elements
can be eliminated. Swap the skd_end_request() and skd_release_req()
calls to avoid triggering a use-after-free. Remove
skd_fitmsg_context.state and .outstanding because FIT messages are
shared among requests and because updating a FIT message after a
request has finished whould trigger a use-after-free.

Signed-off-by: Bart Van Assche <bart.vanassche@xxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxx>
Cc: Johannes Thumshirn <jthumshirn@xxxxxxx>
---
 drivers/block/skd_main.c | 267 +++++++++++++----------------------------------
 1 file changed, 73 insertions(+), 194 deletions(-)

diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 392c898d86e2..35343fbf4144 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
@@ -154,11 +155,6 @@ enum skd_req_state {
 	SKD_REQ_STATE_TIMEOUT,
 };
 
-enum skd_fit_msg_state {
-	SKD_MSG_STATE_IDLE,
-	SKD_MSG_STATE_BUSY,
-};
-
 enum skd_check_status_action {
 	SKD_CHECK_STATUS_REPORT_GOOD,
 	SKD_CHECK_STATUS_REPORT_SMART_ALERT,
@@ -173,12 +169,7 @@ struct skd_msg_buf {
 };
 
 struct skd_fitmsg_context {
-	enum skd_fit_msg_state state;
-
-	struct skd_fitmsg_context *next;
-
 	u32 id;
-	u16 outstanding;
 
 	u32 length;
 
@@ -189,8 +180,6 @@ struct skd_fitmsg_context {
 struct skd_request_context {
 	enum skd_req_state state;
 
-	struct skd_request_context *next;
-
 	u16 id;
 	u32 fitmsg_id;
 
@@ -264,10 +253,8 @@ struct skd_device {
 
 	u32 timeout_slot[SKD_N_TIMEOUT_SLOT];
 	u32 timeout_stamp;
-	struct skd_fitmsg_context *skmsg_free_list;
 	struct skd_fitmsg_context *skmsg_table;
 
-	struct skd_request_context *skreq_free_list;
 	struct skd_request_context *skreq_table;
 
 	struct skd_special_context internal_skspcl;
@@ -387,8 +374,8 @@ static void skd_send_fitmsg(struct skd_device *skdev,
 static void skd_send_special_fitmsg(struct skd_device *skdev,
 				    struct skd_special_context *skspcl);
 static void skd_request_fn(struct request_queue *rq);
-static void skd_end_request(struct skd_device *skdev,
-		struct skd_request_context *skreq, blk_status_t status);
+static void skd_end_request(struct skd_device *skdev, struct request *req,
+			    blk_status_t status);
 static bool skd_preop_sg_list(struct skd_device *skdev,
 			     struct skd_request_context *skreq);
 static void skd_postop_sg_list(struct skd_device *skdev,
@@ -405,8 +392,6 @@ static void skd_soft_reset(struct skd_device *skdev);
 const char *skd_drive_state_to_str(int state);
 const char *skd_skdev_state_to_str(enum skd_drvr_state state);
 static void skd_log_skdev(struct skd_device *skdev, const char *event);
-static void skd_log_skmsg(struct skd_device *skdev,
-			  struct skd_fitmsg_context *skmsg, const char *event);
 static void skd_log_skreq(struct skd_device *skdev,
 			  struct skd_request_context *skreq, const char *event);
 
@@ -424,7 +409,7 @@ static void skd_fail_all_pending(struct skd_device *skdev)
 		req = blk_peek_request(q);
 		if (req == NULL)
 			break;
-		blk_start_request(req);
+		WARN_ON_ONCE(blk_queue_start_tag(q, req));
 		__blk_end_request_all(req, BLK_STS_IOERR);
 	}
 }
@@ -523,6 +508,7 @@ static void skd_request_fn(struct request_queue *q)
 	u64 cmdctxt;
 	u32 timo_slot;
 	int flush, fua;
+	u32 tag;
 
 	if (skdev->state != SKD_DRVR_STATE_ONLINE) {
 		if (skd_fail_all(q))
@@ -531,9 +517,7 @@ static void skd_request_fn(struct request_queue *q)
 	}
 
 	if (blk_queue_stopped(skdev->queue)) {
-		if (skdev->skmsg_free_list == NULL ||
-		    skdev->skreq_free_list == NULL ||
-		    skdev->in_flight >= skdev->queue_low_water_mark)
+		if (skdev->in_flight >= skdev->queue_low_water_mark)
 			/* There is still some kind of shortage */
 			return;
 
@@ -581,27 +565,6 @@ static void skd_request_fn(struct request_queue *q)
 			break;
 		}
 
-		/* Is a skd_request_context available? */
-		skreq = skdev->skreq_free_list;
-		if (skreq == NULL) {
-			dev_dbg(&skdev->pdev->dev, "Out of req=%p\n", q);
-			break;
-		}
-		SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
-		SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
-
-		/* Now we check to see if we can get a fit msg */
-		if (skmsg == NULL) {
-			if (skdev->skmsg_free_list == NULL) {
-				dev_dbg(&skdev->pdev->dev, "Out of msg\n");
-				break;
-			}
-		}
-
-		skreq->flush_cmd = 0;
-		skreq->n_sg = 0;
-		skreq->sg_byte_count = 0;
-
 		/*
 		 * OK to now dequeue request from q.
 		 *
@@ -609,7 +572,22 @@ static void skd_request_fn(struct request_queue *q)
 		 * the native request. Note that skd_request_context is
 		 * available but is still at the head of the free list.
 		 */
-		blk_start_request(req);
+		WARN_ON_ONCE(blk_queue_start_tag(q, req));
+
+		tag = blk_mq_unique_tag(req);
+		WARN_ONCE(tag >= skd_max_queue_depth,
+			  "%#x > %#x (nr_requests = %lu)\n", tag,
+			  skd_max_queue_depth, q->nr_requests);
+
+		skreq = &skdev->skreq_table[tag];
+		SKD_ASSERT(skreq->state == SKD_REQ_STATE_IDLE);
+		SKD_ASSERT((skreq->id & SKD_ID_INCR) == 0);
+
+		skreq->id = tag + SKD_ID_RW_REQUEST;
+		skreq->flush_cmd = 0;
+		skreq->n_sg = 0;
+		skreq->sg_byte_count = 0;
+
 		skreq->req = req;
 		skreq->fitmsg_id = 0;
 
@@ -618,27 +596,13 @@ static void skd_request_fn(struct request_queue *q)
 
 		if (req->bio && !skd_preop_sg_list(skdev, skreq)) {
 			dev_dbg(&skdev->pdev->dev, "error Out\n");
-			skd_end_request(skdev, skreq, BLK_STS_RESOURCE);
+			skd_end_request(skdev, skreq->req, BLK_STS_RESOURCE);
 			continue;
 		}
 
 		/* Either a FIT msg is in progress or we have to start one. */
 		if (skmsg == NULL) {
-			/* Are there any FIT msg buffers available? */
-			skmsg = skdev->skmsg_free_list;
-			if (skmsg == NULL) {
-				dev_dbg(&skdev->pdev->dev,
-					"Out of msg skdev=%p\n",
-					skdev);
-				break;
-			}
-			SKD_ASSERT(skmsg->state == SKD_MSG_STATE_IDLE);
-			SKD_ASSERT((skmsg->id & SKD_ID_INCR) == 0);
-
-			skdev->skmsg_free_list = skmsg->next;
-
-			skmsg->state = SKD_MSG_STATE_BUSY;
-			skmsg->id += SKD_ID_INCR;
+			skmsg = &skdev->skmsg_table[tag];
 
 			/* Initialize the FIT msg header */
 			fmh = &skmsg->msg_buf->fmh;
@@ -673,7 +637,6 @@ static void skd_request_fn(struct request_queue *q)
 			cpu_to_be32(skreq->sg_byte_count);
 
 		/* Complete resource allocations. */
-		skdev->skreq_free_list = skreq->next;
 		skreq->state = SKD_REQ_STATE_BUSY;
 		skreq->id += SKD_ID_INCR;
 
@@ -717,23 +680,22 @@ static void skd_request_fn(struct request_queue *q)
 		blk_stop_queue(skdev->queue);
 }
 
-static void skd_end_request(struct skd_device *skdev,
-		struct skd_request_context *skreq, blk_status_t error)
+static void skd_end_request(struct skd_device *skdev, struct request *req,
+			    blk_status_t error)
 {
 	if (unlikely(error)) {
-		struct request *req = skreq->req;
 		char *cmd = (rq_data_dir(req) == READ) ? "read" : "write";
 		u32 lba = (u32)blk_rq_pos(req);
 		u32 count = blk_rq_sectors(req);
 
 		dev_err(&skdev->pdev->dev,
 			"Error cmd=%s sect=%u count=%u id=0x%x\n", cmd, lba,
-			count, skreq->id);
+			count, req->tag);
 	} else
-		dev_dbg(&skdev->pdev->dev, "id=0x%x error=%d\n", skreq->id,
+		dev_dbg(&skdev->pdev->dev, "id=0x%x error=%d\n", req->tag,
 			error);
 
-	__blk_end_request_all(skreq->req, error);
+	__blk_end_request_all(req, error);
 }
 
 static bool skd_preop_sg_list(struct skd_device *skdev,
@@ -1346,7 +1308,6 @@ static void skd_send_fitmsg(struct skd_device *skdev,
 			    struct skd_fitmsg_context *skmsg)
 {
 	u64 qcmd;
-	struct fit_msg_hdr *fmh;
 
 	dev_dbg(&skdev->pdev->dev, "dma address 0x%llx, busy=%d\n",
 		skmsg->mb_dma_address, skdev->in_flight);
@@ -1355,9 +1316,6 @@ static void skd_send_fitmsg(struct skd_device *skdev,
 	qcmd = skmsg->mb_dma_address;
 	qcmd |= FIT_QCMD_QID_NORMAL;
 
-	fmh = &skmsg->msg_buf->fmh;
-	skmsg->outstanding = fmh->num_protocol_cmds_coalesced;
-
 	if (unlikely(skdev->dbg_level > 1)) {
 		u8 *bp = (u8 *)skmsg->msg_buf;
 		int i;
@@ -1547,19 +1505,20 @@ skd_check_status(struct skd_device *skdev,
 }
 
 static void skd_resolve_req_exception(struct skd_device *skdev,
-				      struct skd_request_context *skreq)
+				      struct skd_request_context *skreq,
+				      struct request *req)
 {
 	u8 cmp_status = skreq->completion.status;
 
 	switch (skd_check_status(skdev, cmp_status, &skreq->err_info)) {
 	case SKD_CHECK_STATUS_REPORT_GOOD:
 	case SKD_CHECK_STATUS_REPORT_SMART_ALERT:
-		skd_end_request(skdev, skreq, BLK_STS_OK);
+		skd_end_request(skdev, req, BLK_STS_OK);
 		break;
 
 	case SKD_CHECK_STATUS_BUSY_IMMINENT:
 		skd_log_skreq(skdev, skreq, "retry(busy)");
-		blk_requeue_request(skdev->queue, skreq->req);
+		blk_requeue_request(skdev->queue, req);
 		dev_info(&skdev->pdev->dev, "drive BUSY imminent\n");
 		skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT;
 		skdev->timer_countdown = SKD_TIMER_MINUTES(20);
@@ -1567,16 +1526,16 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 		break;
 
 	case SKD_CHECK_STATUS_REQUEUE_REQUEST:
-		if ((unsigned long) ++skreq->req->special < SKD_MAX_RETRIES) {
+		if ((unsigned long) ++req->special < SKD_MAX_RETRIES) {
 			skd_log_skreq(skdev, skreq, "retry");
-			blk_requeue_request(skdev->queue, skreq->req);
+			blk_requeue_request(skdev->queue, req);
 			break;
 		}
 		/* fall through */
 
 	case SKD_CHECK_STATUS_REPORT_ERROR:
 	default:
-		skd_end_request(skdev, skreq, BLK_STS_IOERR);
+		skd_end_request(skdev, req, BLK_STS_IOERR);
 		break;
 	}
 }
@@ -1585,44 +1544,8 @@ static void skd_resolve_req_exception(struct skd_device *skdev,
 static void skd_release_skreq(struct skd_device *skdev,
 			      struct skd_request_context *skreq)
 {
-	u32 msg_slot;
-	struct skd_fitmsg_context *skmsg;
-
 	u32 timo_slot;
 
-	/*
-	 * Reclaim the FIT msg buffer if this is
-	 * the first of the requests it carried to
-	 * be completed. The FIT msg buffer used to
-	 * send this request cannot be reused until
-	 * we are sure the s1120 card has copied
-	 * it to its memory. The FIT msg might have
-	 * contained several requests. As soon as
-	 * any of them are completed we know that
-	 * the entire FIT msg was transferred.
-	 * Only the first completed request will
-	 * match the FIT msg buffer id. The FIT
-	 * msg buffer id is immediately updated.
-	 * When subsequent requests complete the FIT
-	 * msg buffer id won't match, so we know
-	 * quite cheaply that it is already done.
-	 */
-	msg_slot = skreq->fitmsg_id & SKD_ID_SLOT_MASK;
-	SKD_ASSERT(msg_slot < skdev->num_fitmsg_context);
-
-	skmsg = &skdev->skmsg_table[msg_slot];
-	if (skmsg->id == skreq->fitmsg_id) {
-		SKD_ASSERT(skmsg->state == SKD_MSG_STATE_BUSY);
-		SKD_ASSERT(skmsg->outstanding > 0);
-		skmsg->outstanding--;
-		if (skmsg->outstanding == 0) {
-			skmsg->state = SKD_MSG_STATE_IDLE;
-			skmsg->id += SKD_ID_INCR;
-			skmsg->next = skdev->skmsg_free_list;
-			skdev->skmsg_free_list = skmsg;
-		}
-	}
-
 	/*
 	 * Decrease the number of active requests.
 	 * Also decrements the count in the timeout slot.
@@ -1644,8 +1567,20 @@ static void skd_release_skreq(struct skd_device *skdev,
 	 */
 	skreq->state = SKD_REQ_STATE_IDLE;
 	skreq->id += SKD_ID_INCR;
-	skreq->next = skdev->skreq_free_list;
-	skdev->skreq_free_list = skreq;
+}
+
+static struct skd_request_context *skd_skreq_from_rq(struct skd_device *skdev,
+						     struct request *rq)
+{
+	struct skd_request_context *skreq;
+	int i;
+
+	for (i = 0, skreq = skdev->skreq_table; i < skdev->num_fitmsg_context;
+	     i++, skreq++)
+		if (skreq->req == rq)
+			return skreq;
+
+	return NULL;
 }
 
 static int skd_isr_completion_posted(struct skd_device *skdev,
@@ -1654,7 +1589,8 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
 	struct fit_completion_entry_v1 *skcmp;
 	struct fit_comp_error_info *skerr;
 	u16 req_id;
-	u32 req_slot;
+	u32 tag;
+	struct request *rq;
 	struct skd_request_context *skreq;
 	u16 cmp_cntxt;
 	u8 cmp_status;
@@ -1702,18 +1638,24 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
 		 * r/w request (see skd_start() above) or a special request.
 		 */
 		req_id = cmp_cntxt;
-		req_slot = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
+		tag = req_id & SKD_ID_SLOT_AND_TABLE_MASK;
 
 		/* Is this other than a r/w request? */
-		if (req_slot >= skdev->num_req_context) {
+		if (tag >= skdev->num_req_context) {
 			/*
 			 * This is not a completion for a r/w request.
 			 */
+			WARN_ON_ONCE(blk_map_queue_find_tag(skdev->queue->
+							    queue_tags, tag));
 			skd_complete_other(skdev, skcmp, skerr);
 			continue;
 		}
 
-		skreq = &skdev->skreq_table[req_slot];
+		rq = blk_map_queue_find_tag(skdev->queue->queue_tags, tag);
+		if (WARN(!rq, "No request for tag %#x -> %#x\n", cmp_cntxt,
+			 tag))
+			continue;
+		skreq = skd_skreq_from_rq(skdev, rq);
 
 		/*
 		 * Make sure the request ID for the slot matches.
@@ -1745,26 +1687,16 @@ static int skd_isr_completion_posted(struct skd_device *skdev,
 		if (skreq->n_sg > 0)
 			skd_postop_sg_list(skdev, skreq);
 
-		if (!skreq->req) {
-			dev_dbg(&skdev->pdev->dev,
-				"NULL backptr skdreq %p, req=0x%x req_id=0x%x\n",
-				skreq, skreq->id, req_id);
-		} else {
-			/*
-			 * Capture the outcome and post it back to the
-			 * native request.
-			 */
-			if (likely(cmp_status == SAM_STAT_GOOD))
-				skd_end_request(skdev, skreq, BLK_STS_OK);
-			else
-				skd_resolve_req_exception(skdev, skreq);
-		}
+		/* Mark the FIT msg and timeout slot as free. */
+		skd_release_skreq(skdev, skreq);
 
 		/*
-		 * Release the skreq, its FIT msg (if one), timeout slot,
-		 * and queue depth.
+		 * Capture the outcome and post it back to the native request.
 		 */
-		skd_release_skreq(skdev, skreq);
+		if (likely(cmp_status == SAM_STAT_GOOD))
+			skd_end_request(skdev, rq, BLK_STS_OK);
+		else
+			skd_resolve_req_exception(skdev, skreq, rq);
 
 		/* skd_isr_comp_limit equal zero means no limit */
 		if (limit) {
@@ -2099,44 +2031,26 @@ static void skd_recover_requests(struct skd_device *skdev)
 
 	for (i = 0; i < skdev->num_req_context; i++) {
 		struct skd_request_context *skreq = &skdev->skreq_table[i];
+		struct request *req = skreq->req;
 
 		if (skreq->state == SKD_REQ_STATE_BUSY) {
 			skd_log_skreq(skdev, skreq, "recover");
 
 			SKD_ASSERT((skreq->id & SKD_ID_INCR) != 0);
-			SKD_ASSERT(skreq->req != NULL);
+			SKD_ASSERT(req != NULL);
 
 			/* Release DMA resources for the request. */
 			if (skreq->n_sg > 0)
 				skd_postop_sg_list(skdev, skreq);
 
-			skd_end_request(skdev, skreq, BLK_STS_IOERR);
-
 			skreq->req = NULL;
 
 			skreq->state = SKD_REQ_STATE_IDLE;
 			skreq->id += SKD_ID_INCR;
-		}
-		if (i > 0)
-			skreq[-1].next = skreq;
-		skreq->next = NULL;
-	}
-	skdev->skreq_free_list = skdev->skreq_table;
-
-	for (i = 0; i < skdev->num_fitmsg_context; i++) {
-		struct skd_fitmsg_context *skmsg = &skdev->skmsg_table[i];
 
-		if (skmsg->state == SKD_MSG_STATE_BUSY) {
-			skd_log_skmsg(skdev, skmsg, "salvaged");
-			SKD_ASSERT((skmsg->id & SKD_ID_INCR) != 0);
-			skmsg->state = SKD_MSG_STATE_IDLE;
-			skmsg->id += SKD_ID_INCR;
+			skd_end_request(skdev, req, BLK_STS_IOERR);
 		}
-		if (i > 0)
-			skmsg[-1].next = skmsg;
-		skmsg->next = NULL;
 	}
-	skdev->skmsg_free_list = skdev->skmsg_table;
 
 	for (i = 0; i < SKD_N_TIMEOUT_SLOT; i++)
 		skdev->timeout_slot[i] = 0;
@@ -2876,7 +2790,6 @@ static int skd_cons_skmsg(struct skd_device *skdev)
 
 		skmsg->id = i + SKD_ID_FIT_MSG;
 
-		skmsg->state = SKD_MSG_STATE_IDLE;
 		skmsg->msg_buf = pci_alloc_consistent(skdev->pdev,
 						      SKD_N_FITMSG_BYTES,
 						      &skmsg->mb_dma_address);
@@ -2891,14 +2804,8 @@ static int skd_cons_skmsg(struct skd_device *skdev)
 		     "not aligned: msg_buf %p mb_dma_address %#llx\n",
 		     skmsg->msg_buf, skmsg->mb_dma_address);
 		memset(skmsg->msg_buf, 0, SKD_N_FITMSG_BYTES);
-
-		skmsg->next = &skmsg[1];
 	}
 
-	/* Free list is in order starting with the 0th entry. */
-	skdev->skmsg_table[i - 1].next = NULL;
-	skdev->skmsg_free_list = skdev->skmsg_table;
-
 err_out:
 	return rc;
 }
@@ -2958,10 +2865,7 @@ static int skd_cons_skreq(struct skd_device *skdev)
 		struct skd_request_context *skreq;
 
 		skreq = &skdev->skreq_table[i];
-
-		skreq->id = i + SKD_ID_RW_REQUEST;
 		skreq->state = SKD_REQ_STATE_IDLE;
-
 		skreq->sg = kcalloc(skdev->sgs_per_request,
 				    sizeof(struct scatterlist), GFP_KERNEL);
 		if (skreq->sg == NULL) {
@@ -2978,14 +2882,8 @@ static int skd_cons_skreq(struct skd_device *skdev)
 			rc = -ENOMEM;
 			goto err_out;
 		}
-
-		skreq->next = &skreq[1];
 	}
 
-	/* Free list is in order starting with the 0th entry. */
-	skdev->skreq_table[i - 1].next = NULL;
-	skdev->skreq_free_list = skdev->skreq_table;
-
 err_out:
 	return rc;
 }
@@ -3061,6 +2959,8 @@ static int skd_cons_disk(struct skd_device *skdev)
 		goto err_out;
 	}
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+	q->nr_requests = skd_max_queue_depth / 2;
+	blk_queue_init_tags(q, skd_max_queue_depth, NULL, BLK_TAG_ALLOC_FIFO);
 
 	skdev->queue = q;
 	disk->queue = q;
@@ -3789,18 +3689,6 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
 	}
 }
 
-static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
-{
-	switch (state) {
-	case SKD_MSG_STATE_IDLE:
-		return "IDLE";
-	case SKD_MSG_STATE_BUSY:
-		return "BUSY";
-	default:
-		return "???";
-	}
-}
-
 static const char *skd_skreq_state_to_str(enum skd_req_state state)
 {
 	switch (state) {
@@ -3832,15 +3720,6 @@ static void skd_log_skdev(struct skd_device *skdev, const char *event)
 		skdev->timeout_stamp, skdev->skcomp_cycle, skdev->skcomp_ix);
 }
 
-static void skd_log_skmsg(struct skd_device *skdev,
-			  struct skd_fitmsg_context *skmsg, const char *event)
-{
-	dev_dbg(&skdev->pdev->dev, "skmsg=%p event='%s'\n", skmsg, event);
-	dev_dbg(&skdev->pdev->dev, "  state=%s(%d) id=0x%04x length=%d\n",
-		skd_skmsg_state_to_str(skmsg->state), skmsg->state, skmsg->id,
-		skmsg->length);
-}
-
 static void skd_log_skreq(struct skd_device *skdev,
 			  struct skd_request_context *skreq, const char *event)
 {
-- 
2.14.0