Re: [PATCH 3/9] scsi: improved eh timeout handler

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi, Hannes:

On 07/01/2013 10:24 PM, Hannes Reinecke wrote:
When a command runs into a timeout we need to send an 'ABORT TASK'
TMF. This is typically done by the 'eh_abort_handler' LLDD callback.

Conceptually, however, this function is a normal SCSI command, so
there is no need to enter the error handler.

This patch implements a new scsi_abort_command() function which
invokes an asynchronous function scsi_eh_abort_handler() to
abort the commands via the usual 'eh_abort_handler'.

If abort succeeds the command is either retried or terminated,
depending on the number of allowed retries. However, 'eh_eflags'
records the abort, so if the retry would fail again the
command is pushed onto the error handler without trying to
abort it (again); it'll be cleared up from SCSI EH.

Signed-off-by: Hannes Reinecke<hare@xxxxxxx>
---
  drivers/scsi/scsi.c       |   1 +
  drivers/scsi/scsi_error.c | 139 ++++++++++++++++++++++++++++++++++++++++++----
  drivers/scsi/scsi_priv.h  |   2 +
  include/scsi/scsi_cmnd.h  |   2 +
  4 files changed, 132 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index ebe3b0a..06257cf 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -297,6 +297,7 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask)

  		cmd->device = dev;
  		INIT_LIST_HEAD(&cmd->list);
+		INIT_WORK(&cmd->abort_work, scmd_eh_abort_handler);
  		spin_lock_irqsave(&dev->list_lock, flags);
  		list_add_tail(&cmd->list,&dev->cmd_list);
  		spin_unlock_irqrestore(&dev->list_lock, flags);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index e76e895..835f7e4 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -55,6 +55,7 @@ static void scsi_eh_done(struct scsi_cmnd *scmd);
  #define HOST_RESET_SETTLE_TIME  (10)

  static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
+static int scsi_try_to_abort_cmd(struct scsi_host_template *, struct scsi_cmnd *);

  /* called with shost->host_lock held */
  void scsi_eh_wakeup(struct Scsi_Host *shost)
@@ -102,6 +103,111 @@ static int scsi_host_eh_past_deadline(struct Scsi_Host *shost)
  }

  /**
+ * scmd_eh_abort_handler - Handle command aborts
+ * @work:	command to be aborted.
+ */
+void
+scmd_eh_abort_handler(struct work_struct *work)
+{
+	struct scsi_cmnd *scmd =
+		container_of(work, struct scsi_cmnd, abort_work);
+	struct scsi_device *sdev = scmd->device;
+	unsigned long flags;
+	int rtn;
+
+	spin_lock_irqsave(sdev->host->host_lock, flags);
+	if (scsi_host_eh_past_deadline(sdev->host)) {
+		spin_unlock_irqrestore(sdev->host->host_lock, flags);
+		SCSI_LOG_ERROR_RECOVERY(3,
+			scmd_printk(KERN_INFO, scmd,
+				    "eh timeout, not aborting\n"));

Command address should be also printed for debugging conveniently:
+"eh timeout, not aborting command %p\n", scmd));


+	} else {
+		spin_unlock_irqrestore(sdev->host->host_lock, flags);
+		SCSI_LOG_ERROR_RECOVERY(3,
+			scmd_printk(KERN_INFO, scmd,
+				    "aborting command %p\n", scmd));
+		rtn = scsi_try_to_abort_cmd(sdev->host->hostt, scmd);
+		if (rtn == SUCCESS) {
+			scmd->result |= DID_TIME_OUT<<  16;
+			if (!scsi_noretry_cmd(scmd)&&

I think 'scsi_device_online(scmd->device)' is also necessary here.


+			    (++scmd->retries<= scmd->allowed)) {
+				SCSI_LOG_ERROR_RECOVERY(3,
+					scmd_printk(KERN_WARNING, scmd,
+						    "retry aborted command\n"));

Command address should be also printed here.


+				scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
+			} else {
+				SCSI_LOG_ERROR_RECOVERY(3,
+					scmd_printk(KERN_WARNING, scmd,
+						    "finish aborted command\n"));

Command address should be also printed here.


+				scsi_finish_command(scmd);
+			}
+			return;
+		}
+		SCSI_LOG_ERROR_RECOVERY(3,
+			scmd_printk(KERN_INFO, scmd,
+				    "abort command failed, rtn %d\n", rtn));

Command address should be also printed here.


+	}
+
+	if (scsi_eh_scmd_add(scmd, 0)) {
+		SCSI_LOG_ERROR_RECOVERY(3,
+			scmd_printk(KERN_WARNING, scmd,
+				    "terminate aborted command\n"));

Command address should be also printed here.


+		scmd->result |= DID_TIME_OUT<<  16;
+		scsi_finish_command(scmd);
+	}
+}
+
+/**
+ * scsi_abort_command - schedule a command abort
+ * @scmd:	scmd to abort.
+ *
+ * We only need to abort commands after a command timeout
+ */
+enum blk_eh_timer_return
+scsi_abort_command(struct scsi_cmnd *scmd)
+{
+	struct scsi_device *sdev = scmd->device;
+	struct Scsi_Host *shost = sdev->host;
+	unsigned long flags;
+
+	if (scmd->eh_eflags&  SCSI_EH_ABORT_SCHEDULED) {
+		/*
+		 * command abort timed out.
+		 */
+		SCSI_LOG_ERROR_RECOVERY(3,
+			scmd_printk(KERN_INFO, scmd,
+				    "scmd %p abort timeout\n", scmd));
+		cancel_work_sync(&scmd->abort_work);
+		return BLK_EH_NOT_HANDLED;
+	}
+
+	/*
+	 * Do not try a command abort if
+	 * SCSI EH has already started.
+	 */
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_in_recovery(shost)) {
+		spin_unlock_irqrestore(shost->host_lock, flags);
+		SCSI_LOG_ERROR_RECOVERY(3,
+			scmd_printk(KERN_INFO, scmd,
+				    "host in recovery, not aborting\n"));

Command address should be also printed here.

Thanks,
Ren


+		return BLK_EH_NOT_HANDLED;
+	}
+
+	if (shost->eh_deadline&&  !shost->last_reset)
+		shost->last_reset = jiffies;
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED;
+	SCSI_LOG_ERROR_RECOVERY(3,
+		scmd_printk(KERN_INFO, scmd,
+			    "scmd %p abort scheduled\n", scmd));
+	schedule_work(&scmd->abort_work);
+	return BLK_EH_SCHEDULED;
+}
+EXPORT_SYMBOL_GPL(scsi_abort_command);
+
+/**
   * scsi_eh_scmd_add - add scsi cmd to error handling.
   * @scmd:	scmd to run eh on.
   * @eh_flag:	optional SCSI_EH flag.
@@ -127,6 +233,8 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
  		shost->last_reset = jiffies;

  	ret = 1;
+	if (scmd->eh_eflags&  SCSI_EH_ABORT_SCHEDULED)
+		eh_flag&= ~SCSI_EH_CANCEL_CMD;
  	scmd->eh_eflags |= eh_flag;
  	list_add_tail(&scmd->eh_entry,&shost->eh_cmd_q);
  	shost->host_failed++;
@@ -1532,6 +1640,8 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)
  	switch (host_byte(scmd->result)) {
  	case DID_OK:
  		break;
+	case DID_TIME_OUT:
+		goto check_type;
  	case DID_BUS_BUSY:
  		return (scmd->request->cmd_flags&  REQ_FAILFAST_TRANSPORT);
  	case DID_PARITY:
@@ -1545,18 +1655,19 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd)
  		return (scmd->request->cmd_flags&  REQ_FAILFAST_DRIVER);
  	}

-	switch (status_byte(scmd->result)) {
-	case CHECK_CONDITION:
-		/*
-		 * assume caller has checked sense and determinted
-		 * the check condition was retryable.
-		 */
-		if (scmd->request->cmd_flags&  REQ_FAILFAST_DEV ||
-		    scmd->request->cmd_type == REQ_TYPE_BLOCK_PC)
-			return 1;
-	}
+	switch (status_byte(scmd->result) != CHECK_CONDITION)
+		return 0;

-	return 0;
+check_type:
+	/*
+	 * assume caller has checked sense and determinted
+	 * the check condition was retryable.
+	 */
+	if (scmd->request->cmd_flags&  REQ_FAILFAST_DEV ||
+	    scmd->request->cmd_type == REQ_TYPE_BLOCK_PC)
+		return 1;
+	else
+		return 0;
  }

  /**
@@ -1606,9 +1717,13 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
  		 * looks good.  drop through, and check the next byte.
  		 */
  		break;
+	case DID_ABORT:
+		if (scmd->eh_eflags&  SCSI_EH_ABORT_SCHEDULED) {
+			scmd->result |= DID_TIME_OUT<<  16;
+			return SUCCESS;
+		}
  	case DID_NO_CONNECT:
  	case DID_BAD_TARGET:
-	case DID_ABORT:
  		/*
  		 * note - this means that we just report the status back
  		 * to the top level driver, not that we actually think
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 8f9a0ca..f079a59 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -19,6 +19,7 @@ struct scsi_nl_hdr;
   * Scsi Error Handler Flags
   */
  #define SCSI_EH_CANCEL_CMD	0x0001	/* Cancel this cmd */
+#define SCSI_EH_ABORT_SCHEDULED	0x0002	/* Abort has been scheduled */

  #define SCSI_SENSE_VALID(scmd) \
  	(((scmd)->sense_buffer[0]&  0x70) == 0x70)
@@ -66,6 +67,7 @@ extern int __init scsi_init_devinfo(void);
  extern void scsi_exit_devinfo(void);

  /* scsi_error.c */
+extern void scmd_eh_abort_handler(struct work_struct *work);
  extern enum blk_eh_timer_return scsi_times_out(struct request *req);
  extern int scsi_error_handler(void *host);
  extern int scsi_decide_disposition(struct scsi_cmnd *cmd);
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index de5f5d8..a2f062e 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -55,6 +55,7 @@ struct scsi_cmnd {
  	struct scsi_device *device;
  	struct list_head list;  /* scsi_cmnd participates in queue lists */
  	struct list_head eh_entry; /* entry for the host eh_cmd_q */
+	struct work_struct abort_work;
  	int eh_eflags;		/* Used by error handlr */

  	/*
@@ -144,6 +145,7 @@ extern void scsi_put_command(struct scsi_cmnd *);
  extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *,
  			       struct device *);
  extern void scsi_finish_command(struct scsi_cmnd *cmd);
+extern enum blk_eh_timer_return scsi_abort_command(struct scsi_cmnd *cmd);

  extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
  				 size_t *offset, size_t *len);

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux