Mike Miller wrote: > Patch 1 of 1 > > This patch provides the better "kick-in-the-pants" on driver load in a > kexec'ed environment. > > I've successfully sanity tested the port in my lab. Randy, please apply and > test. You seem to be able to bring out the worst in the driver. ;-) Hi Mike, I've booted this successfully (new kernel thru kexec) 4-5 times successfully, which doesn't prove a whole lot since the failure is intermittent. Anyway, I'll continue to apply this patch in my daily kernel testing... Thanks. > Author: Chip Coldwell <coldwell@xxxxxxxxxx> > > CCISS: Use PCI power management to reset the controller > > The kexec kernel resets the CCISS hardware in three steps: > > 1. Use PCI power management states to reset the controller > in the kexec kernel. > 2. Clear the MSI/MSI-X bits in PCI configuration space so > that MSI initialization in the kexec kernel doesn't fail. > 3. Use the CCISS "No-op" message to determine when the > controller firmware has recovered from the PCI PM reset. > > Signed-off-by: Mike Miller <mike.miller@xxxxxx> > > ------------------------------------------------------------------------------- > diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c > index 01e6938..ff4a105 100644 > --- a/drivers/block/cciss.c > +++ b/drivers/block/cciss.c > @@ -3390,6 +3390,205 @@ static void free_hba(int i) > kfree(p); > } > > +/* Send a message CDB to the firmware. */ > +static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type) > +{ > + typedef struct { > + CommandListHeader_struct CommandHeader; > + RequestBlock_struct Request; > + ErrDescriptor_struct ErrorDescriptor; > + } Command; > + static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct); > + Command *cmd; > + dma_addr_t paddr64; > + uint32_t paddr32, tag; > + void __iomem *vaddr; > + int i, err; > + > + vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); > + if (vaddr == NULL) > + return -ENOMEM; > + > + /* The Inbound Post Queue only accepts 32-bit physical addresses for the > + CCISS commands, so they must be allocated from the lower 4GiB of > + memory. */ > + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); > + if (err) { > + iounmap(vaddr); > + return -ENOMEM; > + } > + > + cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64); > + if (cmd == NULL) { > + iounmap(vaddr); > + return -ENOMEM; > + } > + > + /* This must fit, because of the 32-bit consistent DMA mask. Also, > + although there's no guarantee, we assume that the address is at > + least 4-byte aligned (most likely, it's page-aligned). */ > + paddr32 = paddr64; > + > + cmd->CommandHeader.ReplyQueue = 0; > + cmd->CommandHeader.SGList = 0; > + cmd->CommandHeader.SGTotal = 0; > + cmd->CommandHeader.Tag.lower = paddr32; > + cmd->CommandHeader.Tag.upper = 0; > + memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8); > + > + cmd->Request.CDBLen = 16; > + cmd->Request.Type.Type = TYPE_MSG; > + cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE; > + cmd->Request.Type.Direction = XFER_NONE; > + cmd->Request.Timeout = 0; /* Don't time out */ > + cmd->Request.CDB[0] = opcode; > + cmd->Request.CDB[1] = type; > + memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */ > + > + cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command); > + cmd->ErrorDescriptor.Addr.upper = 0; > + cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct); > + > + writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET); > + > + for (i = 0; i < 10; i++) { > + tag = readl(vaddr + SA5_REPLY_PORT_OFFSET); > + if ((tag & ~3) == paddr32) > + break; > + schedule_timeout_uninterruptible(HZ); > + } > + > + iounmap(vaddr); > + > + /* we leak the DMA buffer here ... no choice since the controller could > + still complete the command. */ > + if (i == 10) { > + printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n", > + opcode, type); > + return -ETIMEDOUT; > + } > + > + pci_free_consistent(pdev, cmd_sz, cmd, paddr64); > + > + if (tag & 2) { > + printk(KERN_ERR "cciss: controller message %02x:%02x failed\n", > + opcode, type); > + return -EIO; > + } > + > + printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n", > + opcode, type); > + return 0; > +} > + > +#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0) > +#define cciss_noop(p) cciss_message(p, 3, 0) > + > +static __devinit int cciss_reset_msi(struct pci_dev *pdev) > +{ > +/* the #defines are stolen from drivers/pci/msi.h. */ > +#define msi_control_reg(base) (base + PCI_MSI_FLAGS) > +#define PCI_MSIX_FLAGS_ENABLE (1 << 15) > + > + int pos; > + u16 control = 0; > + > + pos = pci_find_capability(pdev, PCI_CAP_ID_MSI); > + if (pos) { > + pci_read_config_word(pdev, msi_control_reg(pos), &control); > + if (control & PCI_MSI_FLAGS_ENABLE) { > + printk(KERN_INFO "cciss: resetting MSI\n"); > + pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE); > + } > + } > + > + pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX); > + if (pos) { > + pci_read_config_word(pdev, msi_control_reg(pos), &control); > + if (control & PCI_MSIX_FLAGS_ENABLE) { > + printk(KERN_INFO "cciss: resetting MSI-X\n"); > + pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE); > + } > + } > + > + return 0; > +} > + > +/* This does a hard reset of the controller using PCI power management > + * states. */ > +static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev) > +{ > + u16 pmcsr, saved_config_space[32]; > + int i, pos; > + > + printk(KERN_INFO "cciss: using PCI PM to reset controller\n"); > + > + /* This is very nearly the same thing as > + > + pci_save_state(pci_dev); > + pci_set_power_state(pci_dev, PCI_D3hot); > + pci_set_power_state(pci_dev, PCI_D0); > + pci_restore_state(pci_dev); > + > + but we can't use these nice canned kernel routines on > + kexec, because they also check the MSI/MSI-X state in PCI > + configuration space and do the wrong thing when it is > + set/cleared. Also, the pci_save/restore_state functions > + violate the ordering requirements for restoring the > + configuration space from the CCISS document (see the > + comment below). So we roll our own .... */ > + > + for (i = 0; i < 32; i++) > + pci_read_config_word(pdev, 2*i, &saved_config_space[i]); > + > + pos = pci_find_capability(pdev, PCI_CAP_ID_PM); > + if (pos == 0) { > + printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n"); > + return -ENODEV; > + } > + > + /* Quoting from the Open CISS Specification: "The Power > + * Management Control/Status Register (CSR) controls the power > + * state of the device. The normal operating state is D0, > + * CSR=00h. The software off state is D3, CSR=03h. To reset > + * the controller, place the interface device in D3 then to > + * D0, this causes a secondary PCI reset which will reset the > + * controller." */ > + > + /* enter the D3hot power management state */ > + pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr); > + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; > + pmcsr |= PCI_D3hot; > + pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); > + > + set_current_state(TASK_UNINTERRUPTIBLE); > + schedule_timeout(HZ >> 1); > + > + /* enter the D0 power management state */ > + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; > + pmcsr |= PCI_D0; > + pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr); > + > + set_current_state(TASK_UNINTERRUPTIBLE); > + schedule_timeout(HZ >> 1); > + > + /* Restore the PCI configuration space. The Open CISS > + * Specification says, "Restore the PCI Configuration > + * Registers, offsets 00h through 60h. It is important to > + * restore the command register, 16-bits at offset 04h, > + * last. Do not restore the configuration status register, > + * 16-bits at offset 06h." Note that the offset is 2*i. */ > + for (i = 0; i < 32; i++) { > + if (i == 2 || i == 3) > + continue; > + pci_write_config_word(pdev, 2*i, saved_config_space[i]); > + } > + wmb(); > + pci_write_config_word(pdev, 4, saved_config_space[2]); > + > + return 0; > +} > + > /* > * This is it. Find all the controllers and register them. I really hate > * stealing all these major device numbers. > @@ -3404,6 +3603,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, > int dac, return_code; > InquiryData_struct *inq_buff = NULL; > > + if (reset_devices) { > + /* Reset the controller with a PCI power-cycle */ > + if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev)) > + return -ENODEV; > + > + /* Some devices (notably the HP Smart Array 5i Controller) > + need a little pause here */ > + schedule_timeout_uninterruptible(30*HZ); > + > + /* Now try to get the controller to respond to a no-op */ > + for (i=0; i<12; i++) { > + if (cciss_noop(pdev) == 0) > + break; > + else > + printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : "")); > + } > + } > + > i = alloc_cciss_hba(); > if (i < 0) > return -1; -- ~Randy -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html