Re: Infinite retries

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, 19 Oct 2008, Boaz Harrosh wrote:

> Alan Stern wrote:
> > We do have a problem with infinite retry loops.  I'm not sure which 
> > kernels are affected, but there's a good chance 2.6.27 is and an 
> > excellent chance that 2.6.28-rc1 will be.
...

> Do you have the scsi_io_completion patchset on a public git somewhere?
> I would like to re-test them and review them again.

They aren't in any git repositories, so I am including the two patches
as attachments to this message.  The first patch changes the failure
analysis logic in scsi_io_completion() along the lines suggested by
James, and the second gets rid of scsi_end_request().  They are based
roughly on 2.6.27, so they might not apply cleanly after the merge
window.

Neither patch addresses the infinite-retry problem; I wanted to keep 
the issues separate.

> Did you try them with above problem and do they solve the issue?

At this point I can't remember exactly which combinations I tried!  :-)  
However I don't think these patches will have any effect on the retry 
loop.

> Also have you looked farther into the retries/timeout issues from
> block layer?

Not yet.  I'm waiting for 2.6.28-rc1 to appear.

Alan Stern
Index: usb-2.6/drivers/scsi/scsi_lib.c
===================================================================
--- usb-2.6.orig/drivers/scsi/scsi_lib.c
+++ usb-2.6/drivers/scsi/scsi_lib.c
@@ -859,6 +859,8 @@ void scsi_io_completion(struct scsi_cmnd
 	struct scsi_sense_hdr sshdr;
 	int sense_valid = 0;
 	int sense_deferred = 0;
+	enum {ACTION_FAIL, ACTION_REQUEUE, ACTION_RETRY,
+			ACTION_DELAYED_RETRY} action;
 
 	if (result) {
 		sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
@@ -909,11 +911,15 @@ void scsi_io_completion(struct scsi_cmnd
 	if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
 		return;
 	this_count = blk_rq_bytes(req);
+	action = ACTION_FAIL;
 
-	/* good_bytes = 0, or (inclusive) there were leftovers and
-	 * result = 0, so scsi_end_request couldn't retry.
-	 */
-	if (sense_valid && !sense_deferred) {
+	if (host_byte(result) == DID_RESET) {
+		/* Third party bus reset or reset for error recovery
+		 * reasons.  Just retry the command and see what
+		 * happens.
+		 */
+		action = ACTION_RETRY;
+	} else if (sense_valid && !sense_deferred) {
 		switch (sshdr.sense_key) {
 		case UNIT_ATTENTION:
 			if (cmd->device->removable) {
@@ -921,16 +927,13 @@ void scsi_io_completion(struct scsi_cmnd
 				 * and quietly refuse further access.
 				 */
 				cmd->device->changed = 1;
-				scsi_end_request(cmd, -EIO, this_count, 1);
-				return;
 			} else {
 				/* Must have been a power glitch, or a
 				 * bus reset.  Could not have been a
 				 * media change, so we just retry the
-				 * request and see what happens.
+				 * command and see what happens.
 				 */
-				scsi_requeue_command(q, cmd);
-				return;
+				action = ACTION_RETRY;
 			}
 			break;
 		case ILLEGAL_REQUEST:
@@ -946,22 +949,13 @@ void scsi_io_completion(struct scsi_cmnd
 			    sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
 			    (cmd->cmnd[0] == READ_10 ||
 			     cmd->cmnd[0] == WRITE_10)) {
+				/* This will issue a new 6-byte command. */
 				cmd->device->use_10_for_rw = 0;
-				/* This will cause a retry with a
-				 * 6-byte command.
-				 */
-				scsi_requeue_command(q, cmd);
-			} else if (sshdr.asc == 0x10) /* DIX */
-				scsi_end_request(cmd, -EIO, this_count, 0);
-			else
-				scsi_end_request(cmd, -EIO, this_count, 1);
-			return;
-		case ABORTED_COMMAND:
-			if (sshdr.asc == 0x10) { /* DIF */
-				scsi_end_request(cmd, -EIO, this_count, 0);
-				return;
+				action = ACTION_REQUEUE;
 			}
 			break;
+		case ABORTED_COMMAND:
+			break;
 		case NOT_READY:
 			/* If the device is in the process of becoming
 			 * ready, or has a temporary blockage, retry.
@@ -975,19 +969,16 @@ void scsi_io_completion(struct scsi_cmnd
 				case 0x07: /* operation in progress */
 				case 0x08: /* Long write in progress */
 				case 0x09: /* self test in progress */
-					scsi_requeue_command(q, cmd);
-					return;
-				default:
+					action = ACTION_DELAYED_RETRY;
 					break;
 				}
 			}
-			if (!(req->cmd_flags & REQ_QUIET))
+			if (!(req->cmd_flags & REQ_QUIET) &&
+					action == ACTION_FAIL)
 				scsi_cmd_print_sense_hdr(cmd,
 							 "Device not ready",
 							 &sshdr);
-
-			scsi_end_request(cmd, -EIO, this_count, 1);
-			return;
+			break;
 		case VOLUME_OVERFLOW:
 			if (!(req->cmd_flags & REQ_QUIET)) {
 				scmd_printk(KERN_INFO, cmd,
@@ -996,28 +987,38 @@ void scsi_io_completion(struct scsi_cmnd
 				scsi_print_sense("", cmd);
 			}
 			/* See SSC3rXX or current. */
-			scsi_end_request(cmd, -EIO, this_count, 1);
-			return;
-		default:
 			break;
 		}
 	}
-	if (host_byte(result) == DID_RESET) {
-		/* Third party bus reset or reset for error recovery
-		 * reasons.  Just retry the request and see what
-		 * happens.
+
+	switch (action) {
+	case ACTION_FAIL:
+		/* Give up and fail the remainder of the request */
+		if (result) {
+			if (!(req->cmd_flags & REQ_QUIET)) {
+				scsi_print_result(cmd);
+				if (driver_byte(result) & DRIVER_SENSE)
+					scsi_print_sense("", cmd);
+			}
+		}
+		blk_end_request(req, -EIO, blk_rq_bytes(req));
+		scsi_next_command(cmd);
+		break;
+	case ACTION_REQUEUE:
+		/* Put the request back at the head of the queue.
+		 * A new command will be prepared and issued.
 		 */
 		scsi_requeue_command(q, cmd);
-		return;
-	}
-	if (result) {
-		if (!(req->cmd_flags & REQ_QUIET)) {
-			scsi_print_result(cmd);
-			if (driver_byte(result) & DRIVER_SENSE)
-				scsi_print_sense("", cmd);
-		}
+		break;
+	case ACTION_RETRY:
+		/* Retry the same command immediately */
+		scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
+		break;
+	case ACTION_DELAYED_RETRY:
+		/* Retry the same command after a delay */
+		scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
+		break;
 	}
-	scsi_end_request(cmd, -EIO, this_count, !result);
 }
 
 static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
Index: usb-2.6/drivers/scsi/scsi_lib.c
===================================================================
--- usb-2.6.orig/drivers/scsi/scsi_lib.c
+++ usb-2.6/drivers/scsi/scsi_lib.c
@@ -642,69 +642,6 @@ void scsi_run_host_queues(struct Scsi_Ho
 		scsi_run_queue(sdev->request_queue);
 }
 
-/*
- * Function:    scsi_end_request()
- *
- * Purpose:     Post-processing of completed commands (usually invoked at end
- *		of upper level post-processing and scsi_io_completion).
- *
- * Arguments:   cmd	 - command that is complete.
- *              error    - 0 if I/O indicates success, < 0 for I/O error.
- *              bytes    - number of bytes of completed I/O
- *		requeue  - indicates whether we should requeue leftovers.
- *
- * Lock status: Assumed that lock is not held upon entry.
- *
- * Returns:     cmd if requeue required, NULL otherwise.
- *
- * Notes:       This is called for block device requests in order to
- *              mark some number of sectors as complete.
- * 
- *		We are guaranteeing that the request queue will be goosed
- *		at some point during this call.
- * Notes:	If cmd was requeued, upon return it will be a stale pointer.
- */
-static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
-					  int bytes, int requeue)
-{
-	struct request_queue *q = cmd->device->request_queue;
-	struct request *req = cmd->request;
-
-	/*
-	 * If there are blocks left over at the end, set up the command
-	 * to queue the remainder of them.
-	 */
-	if (blk_end_request(req, error, bytes)) {
-		int leftover = (req->hard_nr_sectors << 9);
-
-		if (blk_pc_request(req))
-			leftover = req->data_len;
-
-		/* kill remainder if no retrys */
-		if (error && blk_noretry_request(req))
-			blk_end_request(req, error, leftover);
-		else {
-			if (requeue) {
-				/*
-				 * Bleah.  Leftovers again.  Stick the
-				 * leftovers in the front of the
-				 * queue, and goose the queue again.
-				 */
-				scsi_requeue_command(q, cmd);
-				cmd = NULL;
-			}
-			return cmd;
-		}
-	}
-
-	/*
-	 * This will goose the queue request function at the end, so we don't
-	 * need to worry about launching another command.
-	 */
-	scsi_next_command(cmd);
-	return NULL;
-}
-
 static inline unsigned int scsi_sgtable_index(unsigned short nents)
 {
 	unsigned int index;
@@ -852,7 +789,6 @@ static void scsi_end_bidi_request(struct
 void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 {
 	int result = cmd->result;
-	int this_count;
 	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = cmd->request;
 	int error = 0;
@@ -903,22 +839,28 @@ void scsi_io_completion(struct scsi_cmnd
 	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
 				      "%d bytes done.\n",
 				      req->nr_sectors, good_bytes));
-
-	/* A number of bytes were successfully read.  If there
-	 * are leftovers and there is some kind of error
-	 * (result != 0), retry the rest.
-	 */
-	if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
+	if (blk_end_request(req, error, good_bytes) == 0) {
+		/* This request is completely finished; start the next one */
+		scsi_next_command(cmd);
 		return;
-	this_count = blk_rq_bytes(req);
+	}
+
+	/* The request isn't finished yet.  Figure out what to do next. */
 	action = ACTION_FAIL;
+	if (result == 0) {
+		/* No error, so carry out the remainder of the request. */
+		action = ACTION_REQUEUE;
 
-	if (host_byte(result) == DID_RESET) {
+	} else if (error && blk_noretry_request(req)) {
+		/* Retrys are disallowed, so kill the remainder. */
+
+	} else if (host_byte(result) == DID_RESET) {
 		/* Third party bus reset or reset for error recovery
 		 * reasons.  Just retry the command and see what
 		 * happens.
 		 */
 		action = ACTION_RETRY;
+
 	} else if (sense_valid && !sense_deferred) {
 		switch (sshdr.sense_key) {
 		case UNIT_ATTENTION:

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux