From: Kevin Barnett <kevin.barnett@xxxxxxxxxxxxx> Implement aborts as resets. Avoid I/O stalls across all devices attached to a controller when device I/O requests time out. Reviewed-by: Mahesh Rajashekhara <mahesh.rajashekhara@xxxxxxxxxxxxx> Reviewed-by: Scott Teel <scott.teel@xxxxxxxxxxxxx> Reviewed-by: Scott Benesh <scott.benesh@xxxxxxxxxxxxx> Reviewed-by: Mike McGowen <mike.mcgowen@xxxxxxxxxxxxx> Signed-off-by: Kevin Barnett <kevin.barnett@xxxxxxxxxxxxx> Signed-off-by: Don Brace <don.brace@xxxxxxxxxxxxx> --- drivers/scsi/smartpqi/smartpqi.h | 14 ++- drivers/scsi/smartpqi/smartpqi_init.c | 171 ++++++++++++++++++++------ 2 files changed, 149 insertions(+), 36 deletions(-) diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h index e392eaf5b2bf..e560d99efa95 100644 --- a/drivers/scsi/smartpqi/smartpqi.h +++ b/drivers/scsi/smartpqi/smartpqi.h @@ -1085,7 +1085,16 @@ struct pqi_stream_data { u32 last_accessed; }; -#define PQI_MAX_LUNS_PER_DEVICE 256 +#define PQI_MAX_LUNS_PER_DEVICE 256 + +struct pqi_tmf_work { + struct work_struct work_struct; + struct scsi_cmnd *scmd; + struct pqi_ctrl_info *ctrl_info; + struct pqi_scsi_dev *device; + u8 lun; + u8 scsi_opcode; +}; struct pqi_scsi_dev { int devtype; /* as reported by INQUIRY command */ @@ -1111,6 +1120,7 @@ struct pqi_scsi_dev { u8 erase_in_progress : 1; bool aio_enabled; /* only valid for physical disks */ bool in_remove; + bool in_reset[PQI_MAX_LUNS_PER_DEVICE]; bool device_offline; u8 vendor[8]; /* bytes 8-15 of inquiry data */ u8 model[16]; /* bytes 16-31 of inquiry data */ @@ -1149,6 +1159,8 @@ struct pqi_scsi_dev { struct pqi_stream_data stream_data[NUM_STREAMS_PER_LUN]; atomic_t scsi_cmds_outstanding[PQI_MAX_LUNS_PER_DEVICE]; unsigned int raid_bypass_cnt; + + struct pqi_tmf_work tmf_work[PQI_MAX_LUNS_PER_DEVICE]; }; /* VPD inquiry pages */ diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 4486259f85ab..ec36896eb08e 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -48,6 +48,8 @@ #define PQI_POST_RESET_DELAY_SECS 5 #define PQI_POST_OFA_RESET_DELAY_UPON_TIMEOUT_SECS 10 +#define PQI_NO_COMPLETION ((void *)-1) + MODULE_AUTHOR("Microchip"); MODULE_DESCRIPTION("Driver for Microchip Smart Family Controller version " DRIVER_VERSION); @@ -96,6 +98,7 @@ static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info); static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun, unsigned long timeout_msecs); static void pqi_fail_all_outstanding_requests(struct pqi_ctrl_info *ctrl_info); +static void pqi_tmf_worker(struct work_struct *work); /* for flags argument to pqi_submit_raid_request_synchronous() */ #define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1 @@ -455,6 +458,21 @@ static inline bool pqi_device_in_remove(struct pqi_scsi_dev *device) return device->in_remove; } +static inline void pqi_device_reset_start(struct pqi_scsi_dev *device, u8 lun) +{ + device->in_reset[lun] = true; +} + +static inline void pqi_device_reset_done(struct pqi_scsi_dev *device, u8 lun) +{ + device->in_reset[lun] = false; +} + +static inline bool pqi_device_in_reset(struct pqi_scsi_dev *device, u8 lun) +{ + return device->in_reset[lun]; +} + static inline int pqi_event_type_to_event_index(unsigned int event_type) { int index; @@ -2122,6 +2140,15 @@ static inline bool pqi_is_device_added(struct pqi_scsi_dev *device) return device->sdev != NULL; } +static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device) +{ + unsigned int lun; + struct pqi_tmf_work *tmf_work; + + for (lun = 0, tmf_work = device->tmf_work; lun < PQI_MAX_LUNS_PER_DEVICE; lun++, tmf_work++) + INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker); +} + static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices) { @@ -2202,6 +2229,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info, list_add_tail(&device->add_list_entry, &add_list); /* To prevent this device structure from being freed later. */ device->keep_device = true; + pqi_init_device_tmf_work(device); } spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags); @@ -5623,6 +5651,7 @@ static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd) void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd) { struct pqi_scsi_dev *device; + struct completion *wait; if (!scmd->device) { set_host_byte(scmd, DID_NO_CONNECT); @@ -5636,6 +5665,10 @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd) } atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]); + + wait = (struct completion *)xchg(&scmd->host_scribble, NULL); + if (wait != PQI_NO_COMPLETION) + complete(wait); } static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info, @@ -5719,6 +5752,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm u16 hw_queue; struct pqi_queue_group *queue_group; bool raid_bypassed; + u8 lun; + + scmd->host_scribble = PQI_NO_COMPLETION; device = scmd->device->hostdata; @@ -5728,7 +5764,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm return 0; } - atomic_inc(&device->scsi_cmds_outstanding[scmd->device->lun]); + lun = (u8)scmd->device->lun; + + atomic_inc(&device->scsi_cmds_outstanding[lun]); ctrl_info = shost_to_hba(shost); @@ -5738,7 +5776,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm return 0; } - if (pqi_ctrl_blocked(ctrl_info)) { + if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device, lun)) { rc = SCSI_MLQUEUE_HOST_BUSY; goto out; } @@ -5773,8 +5811,10 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm } out: - if (rc) - atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]); + if (rc) { + scmd->host_scribble = NULL; + atomic_dec(&device->scsi_cmds_outstanding[lun]); + } return rc; } @@ -5868,7 +5908,7 @@ static int pqi_wait_until_inbound_queues_empty(struct pqi_ctrl_info *ctrl_info) } static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info, - struct pqi_scsi_dev *device) + struct pqi_scsi_dev *device, u8 lun) { unsigned int i; unsigned int path; @@ -5894,6 +5934,9 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info, if (scsi_device != device) continue; + if ((u8)scmd->device->lun != lun) + continue; + list_del(&io_request->request_list_entry); set_host_byte(scmd, DID_RESET); pqi_free_io_request(io_request); @@ -5990,15 +6033,13 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info, #define PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS 30 -static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) +static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun) { int rc; struct pqi_io_request *io_request; DECLARE_COMPLETION_ONSTACK(wait); struct pqi_task_management_request *request; - struct pqi_scsi_dev *device; - device = scmd->device->hostdata; io_request = pqi_alloc_io_request(ctrl_info, NULL); io_request->io_complete_callback = pqi_lun_reset_complete; io_request->context = &wait; @@ -6011,14 +6052,14 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd put_unaligned_le16(io_request->index, &request->request_id); memcpy(request->lun_number, device->scsi3addr, sizeof(request->lun_number)); if (!pqi_is_logical_device(device) && ctrl_info->multi_lun_device_supported) - request->ml_device_lun_number = (u8)scmd->device->lun; + request->ml_device_lun_number = lun; request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET; if (ctrl_info->tmf_iu_timeout_supported) put_unaligned_le16(PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS, &request->timeout); pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH, io_request); - rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, (u8)scmd->device->lun, &wait); + rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, lun, &wait); if (rc == 0) rc = io_request->status; @@ -6032,18 +6073,16 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd #define PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS (10 * 60 * 1000) #define PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS (2 * 60 * 1000) -static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) +static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun) { int reset_rc; int wait_rc; unsigned int retries; unsigned long timeout_msecs; - struct pqi_scsi_dev *device; - device = scmd->device->hostdata; for (retries = 0;;) { - reset_rc = pqi_lun_reset(ctrl_info, scmd); - if (reset_rc == 0 || reset_rc == -ENODEV || ++retries > PQI_LUN_RESET_RETRIES) + reset_rc = pqi_lun_reset(ctrl_info, device, lun); + if (reset_rc == 0 || reset_rc == -ENODEV || reset_rc == -ENXIO || ++retries > PQI_LUN_RESET_RETRIES) break; msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS); } @@ -6051,60 +6090,53 @@ static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct sc timeout_msecs = reset_rc ? PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS : PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS; - wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, scmd->device->lun, timeout_msecs); + wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, lun, timeout_msecs); if (wait_rc && reset_rc == 0) reset_rc = wait_rc; return reset_rc == 0 ? SUCCESS : FAILED; } -static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd) +static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun) { int rc; - struct pqi_scsi_dev *device; - device = scmd->device->hostdata; pqi_ctrl_block_requests(ctrl_info); pqi_ctrl_wait_until_quiesced(ctrl_info); - pqi_fail_io_queued_for_device(ctrl_info, device); + pqi_fail_io_queued_for_device(ctrl_info, device, lun); rc = pqi_wait_until_inbound_queues_empty(ctrl_info); + pqi_device_reset_start(device, lun); + pqi_ctrl_unblock_requests(ctrl_info); if (rc) rc = FAILED; else - rc = pqi_lun_reset_with_retries(ctrl_info, scmd); - pqi_ctrl_unblock_requests(ctrl_info); + rc = pqi_lun_reset_with_retries(ctrl_info, device, lun); + pqi_device_reset_done(device, lun); return rc; } -static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd) +static int pqi_device_reset_handler(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun, struct scsi_cmnd *scmd, u8 scsi_opcode) { int rc; - struct Scsi_Host *shost; - struct pqi_ctrl_info *ctrl_info; - struct pqi_scsi_dev *device; - - shost = scmd->device->host; - ctrl_info = shost_to_hba(shost); - device = scmd->device->hostdata; mutex_lock(&ctrl_info->lun_reset_mutex); dev_err(&ctrl_info->pci_dev->dev, "resetting scsi %d:%d:%d:%d due to cmd 0x%02x\n", - shost->host_no, - device->bus, device->target, (u32)scmd->device->lun, + ctrl_info->scsi_host->host_no, + device->bus, device->target, lun, scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff); pqi_check_ctrl_health(ctrl_info); if (pqi_ctrl_offline(ctrl_info)) rc = FAILED; else - rc = pqi_device_reset(ctrl_info, scmd); + rc = pqi_device_reset(ctrl_info, device, lun); dev_err(&ctrl_info->pci_dev->dev, - "reset of scsi %d:%d:%d:%d: %s\n", - shost->host_no, device->bus, device->target, (u32)scmd->device->lun, + "reset of scsi %d:%d:%d:%u: %s\n", + ctrl_info->scsi_host->host_no, device->bus, device->target, lun, rc == SUCCESS ? "SUCCESS" : "FAILED"); mutex_unlock(&ctrl_info->lun_reset_mutex); @@ -6112,6 +6144,74 @@ static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd) return rc; } +static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd) +{ + struct Scsi_Host *shost; + struct pqi_ctrl_info *ctrl_info; + struct pqi_scsi_dev *device; + u8 scsi_opcode; + + shost = scmd->device->host; + ctrl_info = shost_to_hba(shost); + device = scmd->device->hostdata; + scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff; + + return pqi_device_reset_handler(ctrl_info, device, (u8)scmd->device->lun, scmd, scsi_opcode); +} + +static void pqi_tmf_worker(struct work_struct *work) +{ + struct pqi_tmf_work *tmf_work; + struct scsi_cmnd *scmd; + + tmf_work = container_of(work, struct pqi_tmf_work, work_struct); + scmd = (struct scsi_cmnd *)xchg(&tmf_work->scmd, NULL); + + pqi_device_reset_handler(tmf_work->ctrl_info, tmf_work->device, tmf_work->lun, scmd, tmf_work->scsi_opcode); +} + +static int pqi_eh_abort_handler(struct scsi_cmnd *scmd) +{ + struct Scsi_Host *shost; + struct pqi_ctrl_info *ctrl_info; + struct pqi_scsi_dev *device; + struct pqi_tmf_work *tmf_work; + DECLARE_COMPLETION_ONSTACK(wait); + + shost = scmd->device->host; + ctrl_info = shost_to_hba(shost); + + dev_err(&ctrl_info->pci_dev->dev, + "attempting TASK ABORT on SCSI cmd at %p\n", scmd); + + if (cmpxchg(&scmd->host_scribble, PQI_NO_COMPLETION, (void *)&wait) == NULL) { + dev_err(&ctrl_info->pci_dev->dev, + "SCSI cmd at %p already completed\n", scmd); + scmd->result = DID_RESET << 16; + goto out; + } + + device = scmd->device->hostdata; + tmf_work = &device->tmf_work[scmd->device->lun]; + + if (cmpxchg(&tmf_work->scmd, NULL, scmd) == NULL) { + tmf_work->ctrl_info = ctrl_info; + tmf_work->device = device; + tmf_work->lun = (u8)scmd->device->lun; + tmf_work->scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff; + schedule_work(&tmf_work->work_struct); + } + + wait_for_completion(&wait); + + dev_err(&ctrl_info->pci_dev->dev, + "TASK ABORT on SCSI cmd at %p: SUCCESS\n", scmd); + +out: + + return SUCCESS; +} + static int pqi_slave_alloc(struct scsi_device *sdev) { struct pqi_scsi_dev *device; @@ -7106,6 +7206,7 @@ static const struct scsi_host_template pqi_driver_template = { .scan_finished = pqi_scan_finished, .this_id = -1, .eh_device_reset_handler = pqi_eh_device_reset_handler, + .eh_abort_handler = pqi_eh_abort_handler, .ioctl = pqi_ioctl, .slave_alloc = pqi_slave_alloc, .slave_configure = pqi_slave_configure, -- 2.42.0.rc2