On Sun, 19 Oct 2008, Boaz Harrosh wrote: > Alan Stern wrote: > > We do have a problem with infinite retry loops. I'm not sure which > > kernels are affected, but there's a good chance 2.6.27 is and an > > excellent chance that 2.6.28-rc1 will be. ... > Do you have the scsi_io_completion patchset on a public git somewhere? > I would like to re-test them and review them again. They aren't in any git repositories, so I am including the two patches as attachments to this message. The first patch changes the failure analysis logic in scsi_io_completion() along the lines suggested by James, and the second gets rid of scsi_end_request(). They are based roughly on 2.6.27, so they might not apply cleanly after the merge window. Neither patch addresses the infinite-retry problem; I wanted to keep the issues separate. > Did you try them with above problem and do they solve the issue? At this point I can't remember exactly which combinations I tried! :-) However I don't think these patches will have any effect on the retry loop. > Also have you looked farther into the retries/timeout issues from > block layer? Not yet. I'm waiting for 2.6.28-rc1 to appear. Alan Stern
Index: usb-2.6/drivers/scsi/scsi_lib.c =================================================================== --- usb-2.6.orig/drivers/scsi/scsi_lib.c +++ usb-2.6/drivers/scsi/scsi_lib.c @@ -859,6 +859,8 @@ void scsi_io_completion(struct scsi_cmnd struct scsi_sense_hdr sshdr; int sense_valid = 0; int sense_deferred = 0; + enum {ACTION_FAIL, ACTION_REQUEUE, ACTION_RETRY, + ACTION_DELAYED_RETRY} action; if (result) { sense_valid = scsi_command_normalize_sense(cmd, &sshdr); @@ -909,11 +911,15 @@ void scsi_io_completion(struct scsi_cmnd if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL) return; this_count = blk_rq_bytes(req); + action = ACTION_FAIL; - /* good_bytes = 0, or (inclusive) there were leftovers and - * result = 0, so scsi_end_request couldn't retry. - */ - if (sense_valid && !sense_deferred) { + if (host_byte(result) == DID_RESET) { + /* Third party bus reset or reset for error recovery + * reasons. Just retry the command and see what + * happens. + */ + action = ACTION_RETRY; + } else if (sense_valid && !sense_deferred) { switch (sshdr.sense_key) { case UNIT_ATTENTION: if (cmd->device->removable) { @@ -921,16 +927,13 @@ void scsi_io_completion(struct scsi_cmnd * and quietly refuse further access. */ cmd->device->changed = 1; - scsi_end_request(cmd, -EIO, this_count, 1); - return; } else { /* Must have been a power glitch, or a * bus reset. Could not have been a * media change, so we just retry the - * request and see what happens. + * command and see what happens. */ - scsi_requeue_command(q, cmd); - return; + action = ACTION_RETRY; } break; case ILLEGAL_REQUEST: @@ -946,22 +949,13 @@ void scsi_io_completion(struct scsi_cmnd sshdr.asc == 0x20 && sshdr.ascq == 0x00) && (cmd->cmnd[0] == READ_10 || cmd->cmnd[0] == WRITE_10)) { + /* This will issue a new 6-byte command. */ cmd->device->use_10_for_rw = 0; - /* This will cause a retry with a - * 6-byte command. - */ - scsi_requeue_command(q, cmd); - } else if (sshdr.asc == 0x10) /* DIX */ - scsi_end_request(cmd, -EIO, this_count, 0); - else - scsi_end_request(cmd, -EIO, this_count, 1); - return; - case ABORTED_COMMAND: - if (sshdr.asc == 0x10) { /* DIF */ - scsi_end_request(cmd, -EIO, this_count, 0); - return; + action = ACTION_REQUEUE; } break; + case ABORTED_COMMAND: + break; case NOT_READY: /* If the device is in the process of becoming * ready, or has a temporary blockage, retry. @@ -975,19 +969,16 @@ void scsi_io_completion(struct scsi_cmnd case 0x07: /* operation in progress */ case 0x08: /* Long write in progress */ case 0x09: /* self test in progress */ - scsi_requeue_command(q, cmd); - return; - default: + action = ACTION_DELAYED_RETRY; break; } } - if (!(req->cmd_flags & REQ_QUIET)) + if (!(req->cmd_flags & REQ_QUIET) && + action == ACTION_FAIL) scsi_cmd_print_sense_hdr(cmd, "Device not ready", &sshdr); - - scsi_end_request(cmd, -EIO, this_count, 1); - return; + break; case VOLUME_OVERFLOW: if (!(req->cmd_flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, @@ -996,28 +987,38 @@ void scsi_io_completion(struct scsi_cmnd scsi_print_sense("", cmd); } /* See SSC3rXX or current. */ - scsi_end_request(cmd, -EIO, this_count, 1); - return; - default: break; } } - if (host_byte(result) == DID_RESET) { - /* Third party bus reset or reset for error recovery - * reasons. Just retry the request and see what - * happens. + + switch (action) { + case ACTION_FAIL: + /* Give up and fail the remainder of the request */ + if (result) { + if (!(req->cmd_flags & REQ_QUIET)) { + scsi_print_result(cmd); + if (driver_byte(result) & DRIVER_SENSE) + scsi_print_sense("", cmd); + } + } + blk_end_request(req, -EIO, blk_rq_bytes(req)); + scsi_next_command(cmd); + break; + case ACTION_REQUEUE: + /* Put the request back at the head of the queue. + * A new command will be prepared and issued. */ scsi_requeue_command(q, cmd); - return; - } - if (result) { - if (!(req->cmd_flags & REQ_QUIET)) { - scsi_print_result(cmd); - if (driver_byte(result) & DRIVER_SENSE) - scsi_print_sense("", cmd); - } + break; + case ACTION_RETRY: + /* Retry the same command immediately */ + scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY); + break; + case ACTION_DELAYED_RETRY: + /* Retry the same command after a delay */ + scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); + break; } - scsi_end_request(cmd, -EIO, this_count, !result); } static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
Index: usb-2.6/drivers/scsi/scsi_lib.c =================================================================== --- usb-2.6.orig/drivers/scsi/scsi_lib.c +++ usb-2.6/drivers/scsi/scsi_lib.c @@ -642,69 +642,6 @@ void scsi_run_host_queues(struct Scsi_Ho scsi_run_queue(sdev->request_queue); } -/* - * Function: scsi_end_request() - * - * Purpose: Post-processing of completed commands (usually invoked at end - * of upper level post-processing and scsi_io_completion). - * - * Arguments: cmd - command that is complete. - * error - 0 if I/O indicates success, < 0 for I/O error. - * bytes - number of bytes of completed I/O - * requeue - indicates whether we should requeue leftovers. - * - * Lock status: Assumed that lock is not held upon entry. - * - * Returns: cmd if requeue required, NULL otherwise. - * - * Notes: This is called for block device requests in order to - * mark some number of sectors as complete. - * - * We are guaranteeing that the request queue will be goosed - * at some point during this call. - * Notes: If cmd was requeued, upon return it will be a stale pointer. - */ -static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error, - int bytes, int requeue) -{ - struct request_queue *q = cmd->device->request_queue; - struct request *req = cmd->request; - - /* - * If there are blocks left over at the end, set up the command - * to queue the remainder of them. - */ - if (blk_end_request(req, error, bytes)) { - int leftover = (req->hard_nr_sectors << 9); - - if (blk_pc_request(req)) - leftover = req->data_len; - - /* kill remainder if no retrys */ - if (error && blk_noretry_request(req)) - blk_end_request(req, error, leftover); - else { - if (requeue) { - /* - * Bleah. Leftovers again. Stick the - * leftovers in the front of the - * queue, and goose the queue again. - */ - scsi_requeue_command(q, cmd); - cmd = NULL; - } - return cmd; - } - } - - /* - * This will goose the queue request function at the end, so we don't - * need to worry about launching another command. - */ - scsi_next_command(cmd); - return NULL; -} - static inline unsigned int scsi_sgtable_index(unsigned short nents) { unsigned int index; @@ -852,7 +789,6 @@ static void scsi_end_bidi_request(struct void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { int result = cmd->result; - int this_count; struct request_queue *q = cmd->device->request_queue; struct request *req = cmd->request; int error = 0; @@ -903,22 +839,28 @@ void scsi_io_completion(struct scsi_cmnd SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, " "%d bytes done.\n", req->nr_sectors, good_bytes)); - - /* A number of bytes were successfully read. If there - * are leftovers and there is some kind of error - * (result != 0), retry the rest. - */ - if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL) + if (blk_end_request(req, error, good_bytes) == 0) { + /* This request is completely finished; start the next one */ + scsi_next_command(cmd); return; - this_count = blk_rq_bytes(req); + } + + /* The request isn't finished yet. Figure out what to do next. */ action = ACTION_FAIL; + if (result == 0) { + /* No error, so carry out the remainder of the request. */ + action = ACTION_REQUEUE; - if (host_byte(result) == DID_RESET) { + } else if (error && blk_noretry_request(req)) { + /* Retrys are disallowed, so kill the remainder. */ + + } else if (host_byte(result) == DID_RESET) { /* Third party bus reset or reset for error recovery * reasons. Just retry the command and see what * happens. */ action = ACTION_RETRY; + } else if (sense_valid && !sense_deferred) { switch (sshdr.sense_key) { case UNIT_ATTENTION: