Re: [PATCH v3 02/17] scsi: core: Fix a race between scsi_done() and scsi_times_out()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/1/21 1:43 PM, Keith Busch wrote:
If the the timeout handler successfully sets the state to complete, and
the lld returns BLK_EH_RESET_TIMER, who gets to complete this command?

That's a longstanding problem, isn't it? Anyway, how about replacing this
patch with the two untested patches below?


Subject: [PATCH 1/2] scsi: core: Rename scsi_cmnd.state

Prepare for removal of SCMD_STATE_COMPLETE. This patch does not change
any functionality.

Cc: Ming Lei <ming.lei@xxxxxxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxx>
Cc: Keith Busch <kbusch@xxxxxxxxxx>
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 drivers/scsi/hosts.c      |  2 +-
 drivers/scsi/scsi_error.c |  2 +-
 drivers/scsi/scsi_lib.c   | 12 ++++++------
 include/scsi/scsi_cmnd.h  |  4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index f69b77cbf538..f5869bd13bcf 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -572,7 +572,7 @@ static bool scsi_host_check_in_flight(struct request *rq, void *data,
 	int *count = data;
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);

-	if (test_bit(SCMD_STATE_INFLIGHT, &cmd->state))
+	if (test_bit(SCMD_STATE_INFLIGHT, &cmd->in_flight))
 		(*count)++;

 	return true;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 9cb0f9df621a..7b603b259ae2 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -353,7 +353,7 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
 		 * so return RESET_TIMER to allow error handling another shot
 		 * at this command.
 		 */
-		if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
+		if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->in_flight))
 			return BLK_EH_RESET_TIMER;
 		if (scsi_abort_command(scmd) != SUCCESS) {
 			set_host_byte(scmd, DID_TIME_OUT);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 621d841d819a..6bb0e2726d51 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -280,7 +280,7 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 	unsigned long flags;

 	rcu_read_lock();
-	__clear_bit(SCMD_STATE_INFLIGHT, &cmd->state);
+	__clear_bit(SCMD_STATE_INFLIGHT, &cmd->in_flight);
 	if (unlikely(scsi_host_in_recovery(shost))) {
 		spin_lock_irqsave(shost->host_lock, flags);
 		if (shost->host_failed || shost->host_eh_scheduled)
@@ -1138,7 +1138,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)

 	jiffies_at_alloc = cmd->jiffies_at_alloc;
 	retries = cmd->retries;
-	in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state);
+	in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->in_flight);
 	/*
 	 * Zero out the cmd, except for the embedded scsi_request. Only clear
 	 * the driver-private command data if the LLD does not supply a
@@ -1158,7 +1158,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd)
 	cmd->jiffies_at_alloc = jiffies_at_alloc;
 	cmd->retries = retries;
 	if (in_flight)
-		__set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
+		__set_bit(SCMD_STATE_INFLIGHT, &cmd->in_flight);
 	cmd->budget_token = budget_token;

 }
@@ -1367,7 +1367,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 		spin_unlock_irq(shost->host_lock);
 	}

-	__set_bit(SCMD_STATE_INFLIGHT, &cmd->state);
+	__set_bit(SCMD_STATE_INFLIGHT, &cmd->in_flight);

 	return 1;

@@ -1597,7 +1597,7 @@ void scsi_done(struct scsi_cmnd *cmd)

 	if (unlikely(blk_should_fake_timeout(scsi_cmd_to_rq(cmd)->q)))
 		return;
-	if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state)))
+	if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->in_flight)))
 		return;
 	trace_scsi_dispatch_cmd_done(cmd);
 	blk_mq_complete_request(scsi_cmd_to_rq(cmd));
@@ -1691,7 +1691,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 			goto out_dec_host_busy;
 		req->rq_flags |= RQF_DONTPREP;
 	} else {
-		clear_bit(SCMD_STATE_COMPLETE, &cmd->state);
+		clear_bit(SCMD_STATE_COMPLETE, &cmd->in_flight);
 	}

 	cmd->flags &= SCMD_PRESERVED_FLAGS;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 477a800a9543..6cbfbfbbb803 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -60,7 +60,7 @@ struct scsi_pointer {
 /* flags preserved across unprep / reprep */
 #define SCMD_PRESERVED_FLAGS	(SCMD_INITIALIZED)

-/* for scmd->state */
+/* for scmd->in_flight */
 #define SCMD_STATE_COMPLETE	0
 #define SCMD_STATE_INFLIGHT	1

@@ -139,7 +139,7 @@ struct scsi_cmnd {

 	int result;		/* Status code from lower level driver */
 	int flags;		/* Command flags */
-	unsigned long state;	/* Command completion state */
+	unsigned long in_flight;/* Command completion state */

 	unsigned int extra_len;	/* length of alignment and padding */
 };




Subject: [PATCH 2/2] scsi: core: Fix a race between scsi_done() and scsi_times_out()

If scsi_done() and scsi_times_out() are called concurrently, make sure
that only one of these two functions proceeds. If scsi_done() is called
while scsi_times_out() is in progress and if scsi_times_out() returns
BLK_EH_RESET_TIMER, complete the command. If scsi_times_out() returns
BLK_EH_RESET_TIMER, allow scsi_done() to complete the command.

Cc: Ming Lei <ming.lei@xxxxxxxxxx>
Cc: Hannes Reinecke <hare@xxxxxxx>
Cc: Keith Busch <kbusch@xxxxxxxxxx>
Fixes: 065990bd198e ("scsi: set timed out out mq requests to complete")
Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
---
 drivers/scsi/scsi_error.c  | 35 +++++++++++++++++++++--------------
 drivers/scsi/scsi_lib.c    | 16 +++++++++++++---
 drivers/scsi/virtio_scsi.c |  4 ++--
 include/scsi/scsi_cmnd.h   | 12 ++++++++++--
 4 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 7b603b259ae2..42dd967167e6 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -330,6 +330,15 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
 	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
 	enum blk_eh_timer_return rtn = BLK_EH_DONE;
 	struct Scsi_Host *host = scmd->device->host;
+	int old = SCMD_STATE_SUBMITTED;
+
+	/*
+	 * scsi_done() may be called concurrently with scsi_times_out(). Only
+	 * one of these two functions should proceed. Hence return early if
+	 * scsi_done() won the race.
+	 */
+	if (!atomic_try_cmpxchg(&scmd->state, &old, SCMD_STATE_TIMED_OUT))
+		return BLK_EH_DONE;

 	trace_scsi_dispatch_cmd_timeout(scmd);
 	scsi_log_completion(scmd, TIMEOUT_ERROR);
@@ -341,26 +350,24 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
 		rtn = host->hostt->eh_timed_out(scmd);

 	if (rtn == BLK_EH_DONE) {
-		/*
-		 * Set the command to complete first in order to prevent a real
-		 * completion from releasing the command while error handling
-		 * is using it. If the command was already completed, then the
-		 * lower level driver beat the timeout handler, and it is safe
-		 * to return without escalating error recovery.
-		 *
-		 * If timeout handling lost the race to a real completion, the
-		 * block layer may ignore that due to a fake timeout injection,
-		 * so return RESET_TIMER to allow error handling another shot
-		 * at this command.
-		 */
-		if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->in_flight))
-			return BLK_EH_RESET_TIMER;
 		if (scsi_abort_command(scmd) != SUCCESS) {
 			set_host_byte(scmd, DID_TIME_OUT);
 			scsi_eh_scmd_add(scmd);
 		}
+		return rtn;
 	}

+	/* The order of the atomic_try_cmpxchg() calls below is important. */
+	old = SCMD_STATE_TIMED_OUT;
+	if (atomic_try_cmpxchg(&scmd->state, &old, SCMD_STATE_SUBMITTED))
+		return rtn;
+	old = SCMD_STATE_COMPLETE_AFTER_TIMEOUT;
+	WARN_ON_ONCE(!atomic_try_cmpxchg(&scmd->state, &old,
+					 SCMD_STATE_COMPLETED));
+	WARN_ON_ONCE(scmd->submitter != SUBMITTED_BY_BLOCK_LAYER);
+	trace_scsi_dispatch_cmd_done(scmd);
+	blk_mq_complete_request(scsi_cmd_to_rq(scmd));
+
 	return rtn;
 }

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6bb0e2726d51..797ad188e7a2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1586,6 +1586,8 @@ static blk_status_t scsi_prepare_cmd(struct request *req)

 void scsi_done(struct scsi_cmnd *cmd)
 {
+	int old;
+
 	switch (cmd->submitter) {
 	case SUBMITTED_BY_BLOCK_LAYER:
 		break;
@@ -1597,8 +1599,15 @@ void scsi_done(struct scsi_cmnd *cmd)

 	if (unlikely(blk_should_fake_timeout(scsi_cmd_to_rq(cmd)->q)))
 		return;
-	if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->in_flight)))
-		return;
+	for (;;) {
+		old = SCMD_STATE_SUBMITTED;
+		if (atomic_try_cmpxchg(&cmd->state, &old, SCMD_STATE_COMPLETED))
+			break;
+		old = SCMD_STATE_TIMED_OUT;
+		if (atomic_try_cmpxchg(&cmd->state, &old,
+				       SCMD_STATE_COMPLETE_AFTER_TIMEOUT))
+			return;
+	}
 	trace_scsi_dispatch_cmd_done(cmd);
 	blk_mq_complete_request(scsi_cmd_to_rq(cmd));
 }
@@ -1691,7 +1700,8 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 			goto out_dec_host_busy;
 		req->rq_flags |= RQF_DONTPREP;
 	} else {
-		clear_bit(SCMD_STATE_COMPLETE, &cmd->in_flight);
+		BUILD_BUG_ON(SCMD_STATE_SUBMITTED != 0);
+		atomic_set(&cmd->state, SCMD_STATE_SUBMITTED);
 	}

 	cmd->flags &= SCMD_PRESERVED_FLAGS;
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 19f7d7b90625..9ea3ec308ecd 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -620,8 +620,8 @@ static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
 	 * we're using independent interrupts (e.g. MSI).  Poll the
 	 * virtqueues once.
 	 *
-	 * In the abort case, scsi_done() will do nothing, because the
-	 * command timed out and hence SCMD_STATE_COMPLETE has been set.
+	 * In the abort case, scsi_done() will do nothing. See also the
+	 * scsi_done() implementation.
 	 */
 	virtscsi_poll_requests(vscsi);

diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 6cbfbfbbb803..350b47792a4d 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -53,6 +53,14 @@ struct scsi_pointer {
 	volatile int phase;
 };

+enum scsi_cmd_state {
+	SCMD_STATE_SUBMITTED = 0,	/* Owned by device or not submitted. */
+	SCMD_STATE_COMPLETED = 1,	/* scsi_done() is in progress. */
+	SCMD_STATE_TIMED_OUT = 2,	/* Inside timeout handler. */
+	SCMD_STATE_COMPLETE_AFTER_TIMEOUT = 3, /* Complete after timeout
+						* handler finished. */
+} __packed;
+
 /* for scmd->flags */
 #define SCMD_TAGGED		(1 << 0)
 #define SCMD_INITIALIZED	(1 << 1)
@@ -61,8 +69,7 @@ struct scsi_pointer {
 #define SCMD_PRESERVED_FLAGS	(SCMD_INITIALIZED)

 /* for scmd->in_flight */
-#define SCMD_STATE_COMPLETE	0
-#define SCMD_STATE_INFLIGHT	1
+#define SCMD_STATE_INFLIGHT	0

 enum scsi_cmnd_submitter {
 	SUBMITTED_BY_BLOCK_LAYER = 0,
@@ -92,6 +99,7 @@ struct scsi_cmnd {
 	int retries;
 	int allowed;

+	atomic_t state; /* See also enum scsi_cmd_state */
 	unsigned char prot_op;
 	unsigned char prot_type;
 	unsigned char prot_flags;




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux