CRQ send errors that return with H_CLOSED should return with SCSI_MLQUEUE_HOST_BUSY until firmware alerts the client of a CRQ transport event. The transport event will either reinitialize and requeue the requests, or fail and return IO with DID_ERROR. To avoid failing the eh_* functions while re-attaching to the server adapter this will retry for a period of time while ibmvscsi_send_srp_event returns SCSI_MLQUEUE_HOST_BUSY. Signed-off-by: Robert Jennings <rcj@xxxxxxxxxxxxxxxxxx> Signed-off-by: Brian King <brking@xxxxxxxxxxxxxxxxxx> --- drivers/scsi/ibmvscsi/ibmvscsi.c | 59 ++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 11 deletions(-) Index: linux-2.6/drivers/scsi/ibmvscsi/ibmvscsi.c =================================================================== --- linux-2.6.orig/drivers/scsi/ibmvscsi/ibmvscsi.c 2007-11-09 08:53:02.000000000 -0600 +++ linux-2.6/drivers/scsi/ibmvscsi/ibmvscsi.c 2007-11-09 08:53:36.000000000 -0600 @@ -629,6 +629,16 @@ list_del(&evt_struct->list); del_timer(&evt_struct->timer); + /* If send_crq returns H_CLOSED, return SCSI_MLQUEUE_HOST_BUSY. + * Firmware will send a CRQ with a transport event (0xFF) to + * tell this client what has happened to the transport. This + * will be handled in ibmvscsi_handle_crq() + */ + if (rc == H_CLOSED) { + dev_warn(hostdata->dev, "send warning. " + "Receive queue closed, will retry.\n"); + goto send_busy; + } dev_err(hostdata->dev, "send error %d\n", rc); atomic_inc(&hostdata->request_limit); goto send_error; @@ -976,6 +986,7 @@ int rsp_rc; unsigned long flags; u16 lun = lun_from_dev(cmd->device); + unsigned long wait_switch = 0; /* First, find this command in our sent list so we can figure * out the correct tag @@ -1019,15 +1030,30 @@ tsk_mgmt->lun, tsk_mgmt->task_tag); evt->sync_srp = &srp_rsp; - init_completion(&evt->comp); - rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); - spin_unlock_irqrestore(hostdata->host->host_lock, flags); + + wait_switch = jiffies + (init_timeout * HZ); + do { + init_completion(&evt->comp); + rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); + + if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY) + break; + + spin_unlock_irqrestore(hostdata->host->host_lock, flags); + msleep(10); + spin_lock_irqsave(hostdata->host->host_lock, flags); + } while (time_before(jiffies, wait_switch)); + if (rsp_rc != 0) { + free_event_struct(&found_evt->hostdata->pool, found_evt); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); sdev_printk(KERN_ERR, cmd->device, "failed to send abort() event. rc=%d\n", rsp_rc); return FAILED; } + spin_unlock_irqrestore(hostdata->host->host_lock, flags); + wait_for_completion(&evt->comp); /* make sure we got a good response */ @@ -1099,6 +1125,7 @@ int rsp_rc; unsigned long flags; u16 lun = lun_from_dev(cmd->device); + unsigned long wait_switch = 0; spin_lock_irqsave(hostdata->host->host_lock, flags); evt = get_event_struct(&hostdata->pool); @@ -1125,9 +1152,20 @@ tsk_mgmt->lun); evt->sync_srp = &srp_rsp; - init_completion(&evt->comp); - rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); - spin_unlock_irqrestore(hostdata->host->host_lock, flags); + + wait_switch = jiffies + (init_timeout * HZ); + do { + init_completion(&evt->comp); + rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); + + if (rsp_rc != SCSI_MLQUEUE_HOST_BUSY) + break; + + msleep(10); + spin_lock_irqsave(hostdata->host->host_lock, flags); + } while (time_before(jiffies, wait_switch)); + if (rsp_rc != 0) { sdev_printk(KERN_ERR, cmd->device, "failed to send reset event. rc=%d\n", rsp_rc); - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html