From: Stephen M. Cameron <scameron@xxxxxxxxxxxxxxxxxx> Do not expose drives that are undergoing a format immediately to the OS, instead wait until they are ready before bringing them online. This is so that logical drives created with "rapid parity initialization" do not get immediately kicked off the system for being unresponsive. Signed-off-by: Stephen M. Cameron <scameron@xxxxxxxxxxxxxxxxxx> --- drivers/scsi/hpsa.c | 311 +++++++++++++++++++++++++++++++++++++++++++---- drivers/scsi/hpsa.h | 9 + drivers/scsi/hpsa_cmd.h | 15 ++ 3 files changed, 311 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index cb984c2..a0fcaa6 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -1133,6 +1133,9 @@ static int hpsa_scsi_find_entry(struct hpsa_scsi_dev_t *needle, return DEVICE_UPDATED; return DEVICE_SAME; } else { + /* Keep offline devices offline */ + if (needle->volume_offline) + return DEVICE_NOT_FOUND; return DEVICE_CHANGED; } } @@ -1141,6 +1144,110 @@ static int hpsa_scsi_find_entry(struct hpsa_scsi_dev_t *needle, return DEVICE_NOT_FOUND; } +static void hpsa_monitor_offline_device(struct ctlr_info *h, + unsigned char scsi3addr[]) +{ + struct offline_device_entry *device; + unsigned long flags; + + /* Check to see if device is already on the list */ + spin_lock_irqsave(&h->offline_device_lock, flags); + list_for_each_entry(device, &h->offline_device_list, offline_list) { + if (memcmp(device->scsi3addr, scsi3addr, + sizeof(device->scsi3addr)) == 0) { + spin_unlock_irqrestore(&h->offline_device_lock, flags); + return; + } + } + spin_unlock_irqrestore(&h->offline_device_lock, flags); + + /* Device is not on the list, add it. */ + device = kmalloc(sizeof(*device), GFP_KERNEL); + if (!device) { + dev_warn(&h->pdev->dev, "out of memory in %s\n", __func__); + return; + } + memcpy(device->scsi3addr, scsi3addr, sizeof(device->scsi3addr)); + spin_lock_irqsave(&h->offline_device_lock, flags); + list_add_tail(&device->offline_list, &h->offline_device_list); + spin_unlock_irqrestore(&h->offline_device_lock, flags); +} + +/* Print a message explaining various offline volume states */ +static void hpsa_show_volume_status(struct ctlr_info *h, + struct hpsa_scsi_dev_t *sd) +{ + if (sd->volume_offline == HPSA_VPD_LV_STATUS_UNSUPPORTED) + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume status is not available through vital product data pages.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + switch (sd->volume_offline) { + case HPSA_LV_OK: + break; + case HPSA_LV_UNDERGOING_ERASE: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is undergoing background erase process.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_UNDERGOING_RPI: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is undergoing rapid parity initialization process.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_PENDING_RPI: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is queued for rapid parity initialization process.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_ENCRYPTED_NO_KEY: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is encrypted and cannot be accessed because key is not present.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_PLAINTEXT_IN_ENCRYPT_ONLY_CONTROLLER: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is not encrypted and cannot be accessed because controller is in encryption-only mode.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_UNDERGOING_ENCRYPTION: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is undergoing encryption process.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_UNDERGOING_ENCRYPTION_REKEYING: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is undergoing encryption re-keying process.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_ENCRYPTED_IN_NON_ENCRYPTED_CONTROLLER: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is encrypted and cannot be accessed because controller does not have encryption enabled.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_PENDING_ENCRYPTION: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is pending migration to encrypted state, but process has not started.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + case HPSA_LV_PENDING_ENCRYPTION_REKEYING: + dev_info(&h->pdev->dev, + "C%d:B%d:T%d:L%d Volume is encrypted and is pending encryption rekeying.\n", + h->scsi_host->host_no, + sd->bus, sd->target, sd->lun); + break; + } +} + static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno, struct hpsa_scsi_dev_t *sd[], int nsds) { @@ -1205,6 +1312,20 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno, for (i = 0; i < nsds; i++) { if (!sd[i]) /* if already added above. */ continue; + + /* Don't add devices which are NOT READY, FORMAT IN PROGRESS + * as the SCSI mid-layer does not handle such devices well. + * It relentlessly loops sending TUR at 3Hz, then READ(10) + * at 160Hz, and prevents the system from coming up. + */ + if (sd[i]->volume_offline) { + hpsa_show_volume_status(h, sd[i]); + dev_info(&h->pdev->dev, "c%db%dt%dl%d: temporarily offline\n", + h->scsi_host->host_no, + sd[i]->bus, sd[i]->target, sd[i]->lun); + continue; + } + device_change = hpsa_scsi_find_entry(sd[i], h->dev, h->ndevices, &entry); if (device_change == DEVICE_NOT_FOUND) { @@ -1223,6 +1344,17 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno, } spin_unlock_irqrestore(&h->devlock, flags); + /* Monitor devices which are in one of several NOT READY states to be + * brought online later. This must be done without holding h->devlock, + * so don't touch h->dev[] + */ + for (i = 0; i < nsds; i++) { + if (!sd[i]) /* if already added above. */ + continue; + if (sd[i]->volume_offline) + hpsa_monitor_offline_device(h, sd[i]->scsi3addr); + } + /* Don't notify scsi mid layer of any changes the first time through * (or if there are no changes) scsi_scan_host will do it later the * first time through. @@ -2326,6 +2458,117 @@ static inline void hpsa_set_bus_target_lun(struct hpsa_scsi_dev_t *device, device->lun = lun; } +/* Use VPD inquiry to get details of volume status */ +static int hpsa_get_volume_status(struct ctlr_info *h, + unsigned char scsi3addr[]) +{ + int rc; + int status; + int size; + unsigned char *buf; + + buf = kzalloc(64, GFP_KERNEL); + if (!buf) + return HPSA_VPD_LV_STATUS_UNSUPPORTED; + + /* Does controller have VPD for logical volume status? */ + if (!hpsa_vpd_page_supported(h, scsi3addr, HPSA_VPD_LV_STATUS)) { + dev_warn(&h->pdev->dev, "Logical volume status VPD page is unsupported.\n"); + goto exit_failed; + } + + /* Get the size of the VPD return buffer */ + rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | HPSA_VPD_LV_STATUS, + buf, HPSA_VPD_HEADER_SZ); + if (rc != 0) { + dev_warn(&h->pdev->dev, "Logical volume status VPD inquiry failed.\n"); + goto exit_failed; + } + size = buf[3]; + + /* Now get the whole VPD buffer */ + rc = hpsa_scsi_do_inquiry(h, scsi3addr, VPD_PAGE | HPSA_VPD_LV_STATUS, + buf, size + HPSA_VPD_HEADER_SZ); + if (rc != 0) { + dev_warn(&h->pdev->dev, "Logical volume status VPD inquiry failed.\n"); + goto exit_failed; + } + status = buf[4]; /* status byte */ + + kfree(buf); + return status; +exit_failed: + kfree(buf); + return HPSA_VPD_LV_STATUS_UNSUPPORTED; +} + +/* Determine offline status of a volume. + * Return either: + * 0 (not offline) + * -1 (offline for unknown reasons) + * # (integer code indicating one of several NOT READY states + * describing why a volume is to be kept offline) + */ +static unsigned char hpsa_volume_offline(struct ctlr_info *h, + unsigned char scsi3addr[]) +{ + struct CommandList *c; + unsigned char *sense, sense_key, asc, ascq; + int ldstat = 0; + u16 cmd_status; + u8 scsi_status; +#define ASC_LUN_NOT_READY 0x04 +#define ASCQ_LUN_NOT_READY_FORMAT_IN_PROGRESS 0x04 +#define ASCQ_LUN_NOT_READY_INITIALIZING_CMD_REQ 0x02 + + c = cmd_alloc(h); + if (!c) + return 0; + (void) fill_cmd(c, TEST_UNIT_READY, h, NULL, 0, 0, scsi3addr, TYPE_CMD); + hpsa_scsi_do_simple_cmd_core(h, c); + sense = c->err_info->SenseInfo; + sense_key = sense[2]; + asc = sense[12]; + ascq = sense[13]; + cmd_status = c->err_info->CommandStatus; + scsi_status = c->err_info->ScsiStatus; + cmd_free(h, c); + /* Is the volume 'not ready'? */ + if (cmd_status != CMD_TARGET_STATUS || + scsi_status != SAM_STAT_CHECK_CONDITION || + sense_key != NOT_READY || + asc != ASC_LUN_NOT_READY) { + return 0; + } + + /* Determine the reason for not ready state */ + ldstat = hpsa_get_volume_status(h, scsi3addr); + + /* Keep volume offline in certain cases: */ + switch (ldstat) { + case HPSA_LV_UNDERGOING_ERASE: + case HPSA_LV_UNDERGOING_RPI: + case HPSA_LV_PENDING_RPI: + case HPSA_LV_ENCRYPTED_NO_KEY: + case HPSA_LV_PLAINTEXT_IN_ENCRYPT_ONLY_CONTROLLER: + case HPSA_LV_UNDERGOING_ENCRYPTION: + case HPSA_LV_UNDERGOING_ENCRYPTION_REKEYING: + case HPSA_LV_ENCRYPTED_IN_NON_ENCRYPTED_CONTROLLER: + return ldstat; + case HPSA_VPD_LV_STATUS_UNSUPPORTED: + /* If VPD status page isn't available, + * use ASC/ASCQ to determine state + */ + if ((ascq == ASCQ_LUN_NOT_READY_FORMAT_IN_PROGRESS) || + (ascq == ASCQ_LUN_NOT_READY_INITIALIZING_CMD_REQ)) + return ldstat; + break; + default: + break; + } + return 0; +} + static int hpsa_update_device_info(struct ctlr_info *h, unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device, unsigned char *is_OBDR_device) @@ -2368,10 +2611,13 @@ static int hpsa_update_device_info(struct ctlr_info *h, hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); + this_device->volume_offline = + hpsa_volume_offline(h, scsi3addr); } else { this_device->raid_level = RAID_UNKNOWN; this_device->offload_config = 0; this_device->offload_enabled = 0; + this_device->volume_offline = 0; } if (is_OBDR_device) { @@ -6442,7 +6688,7 @@ static void detect_controller_lockup(struct ctlr_info *h) h->last_heartbeat_timestamp = now; } -static int hpsa_kickoff_rescan(struct ctlr_info *h) +static void hpsa_ack_ctlr_events(struct ctlr_info *h) { int i; char *event_type; @@ -6485,42 +6731,49 @@ static int hpsa_kickoff_rescan(struct ctlr_info *h) hpsa_wait_for_mode_change_ack(h); #endif } - - /* Something in the device list may have changed to trigger - * the event, so do a rescan. - */ - hpsa_scan_start(h->scsi_host); - /* release reference taken on scsi host in check_controller_events */ - scsi_host_put(h->scsi_host); - return 0; + return; } /* Check a register on the controller to see if there are configuration * changes (added/changed/removed logical drives, etc.) which mean that * we should rescan the controller for devices. * Also check flag for driver-initiated rescan. - * If either flag or controller event indicate rescan, add the controller - * to the list of controllers needing to be rescanned, and gets a - * reference to the associated scsi_host. */ -static void hpsa_ctlr_needs_rescan(struct ctlr_info *h) +static int hpsa_ctlr_needs_rescan(struct ctlr_info *h) { + if (h->drv_req_rescan) + return 1; + if (!(h->fw_support & MISC_FW_EVENT_NOTIFY)) - return; + return 0; h->events = readl(&(h->cfgtable->event_notify)); - if (!(h->events & RESCAN_REQUIRED_EVENT_BITS) && !h->drv_req_rescan) - return; + return h->events & RESCAN_REQUIRED_EVENT_BITS; +} - /* - * Take a reference on scsi host for the duration of the scan - * Release in hpsa_kickoff_rescan(). No lock needed for scan_list - * as only a single thread accesses this list. - */ - scsi_host_get(h->scsi_host); - hpsa_kickoff_rescan(h); +/* + * Check if any of the offline devices have become ready + */ +static int hpsa_offline_devices_ready(struct ctlr_info *h) +{ + unsigned long flags; + struct offline_device_entry *d; + struct list_head *this, *tmp; + + spin_lock_irqsave(&h->offline_device_lock, flags); + list_for_each_safe(this, tmp, &h->offline_device_list) { + d = list_entry(this, struct offline_device_entry, + offline_list); + spin_unlock_irqrestore(&h->offline_device_lock, flags); + if (!hpsa_volume_offline(h, d->scsi3addr)) + return 1; + spin_lock_irqsave(&h->offline_device_lock, flags); + } + spin_unlock_irqrestore(&h->offline_device_lock, flags); + return 0; } + static void hpsa_monitor_ctlr_worker(struct work_struct *work) { unsigned long flags; @@ -6529,7 +6782,15 @@ static void hpsa_monitor_ctlr_worker(struct work_struct *work) detect_controller_lockup(h); if (h->lockup_detected) return; - hpsa_ctlr_needs_rescan(h); + + if (hpsa_ctlr_needs_rescan(h) || hpsa_offline_devices_ready(h)) { + scsi_host_get(h->scsi_host); + h->drv_req_rescan = 0; + hpsa_ack_ctlr_events(h); + hpsa_scan_start(h->scsi_host); + scsi_host_put(h->scsi_host); + } + spin_lock_irqsave(&h->lock, flags); if (h->remove_in_progress) { spin_unlock_irqrestore(&h->lock, flags); @@ -6579,7 +6840,9 @@ reinit_after_soft_reset: h->intr_mode = hpsa_simple_mode ? SIMPLE_MODE_INT : PERF_MODE_INT; INIT_LIST_HEAD(&h->cmpQ); INIT_LIST_HEAD(&h->reqQ); + INIT_LIST_HEAD(&h->offline_device_list); spin_lock_init(&h->lock); + spin_lock_init(&h->offline_device_lock); spin_lock_init(&h->scan_lock); spin_lock_init(&h->passthru_count_lock); rc = hpsa_pci_init(h); diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index c4a81f0..ae8c592 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -46,6 +46,7 @@ struct hpsa_scsi_dev_t { unsigned char vendor[8]; /* bytes 8-15 of inquiry data */ unsigned char model[16]; /* bytes 16-31 of inquiry data */ unsigned char raid_level; /* from inquiry page 0xC1 */ + unsigned char volume_offline; /* discovered via TUR or VPD */ u32 ioaccel_handle; int offload_config; /* I/O accel RAID offload configured */ int offload_enabled; /* I/O accel RAID offload enabled */ @@ -197,10 +198,18 @@ struct ctlr_info { CTLR_STATE_CHANGE_EVENT_REDUNDANT_CNTRL | \ CTLR_STATE_CHANGE_EVENT_AIO_ENABLED_DISABLED | \ CTLR_STATE_CHANGE_EVENT_AIO_CONFIG_CHANGE) + spinlock_t offline_device_lock; + struct list_head offline_device_list; int acciopath_status; int drv_req_rescan; /* flag for driver to request rescan event */ int raid_offload_debug; }; + +struct offline_device_entry { + unsigned char scsi3addr[8]; + struct list_head offline_list; +}; + #define HPSA_ABORT_MSG 0 #define HPSA_DEVICE_RESET_MSG 1 #define HPSA_RESET_TYPE_CONTROLLER 0x00 diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index eaa7fda..8026d2e 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -147,8 +147,23 @@ #define HPSA_VPD_SUPPORTED_PAGES 0x00 #define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 #define HPSA_VPD_LV_IOACCEL_STATUS 0xC2 +#define HPSA_VPD_LV_STATUS 0xC3 #define HPSA_VPD_HEADER_SZ 4 +/* Logical volume states */ +#define HPSA_VPD_LV_STATUS_UNSUPPORTED -1 +#define HPSA_LV_OK 0x0 +#define HPSA_LV_UNDERGOING_ERASE 0x0F +#define HPSA_LV_UNDERGOING_RPI 0x12 +#define HPSA_LV_PENDING_RPI 0x13 +#define HPSA_LV_ENCRYPTED_NO_KEY 0x14 +#define HPSA_LV_PLAINTEXT_IN_ENCRYPT_ONLY_CONTROLLER 0x15 +#define HPSA_LV_UNDERGOING_ENCRYPTION 0x16 +#define HPSA_LV_UNDERGOING_ENCRYPTION_REKEYING 0x17 +#define HPSA_LV_ENCRYPTED_IN_NON_ENCRYPTED_CONTROLLER 0x18 +#define HPSA_LV_PENDING_ENCRYPTION 0x19 +#define HPSA_LV_PENDING_ENCRYPTION_REKEYING 0x1A + struct vals32 { u32 lower; u32 upper; -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html