On Mon, 2014-03-03 at 17:37 -0800, gregkh@xxxxxxxxxxxxxxxxxxx wrote: > The patch below does not apply to the 3.13-stable tree. > If someone wants it applied there, or to any other stable or longterm > tree, then please email the backport, including the original git commit > id to <stable@xxxxxxxxxxxxxxx>. Gavin, can you send a backport please ? Cheers, Ben. > thanks, > > greg k-h > > ------------------ original commit in Linus's tree ------------------ > > >From 947166043732b69878123bf31f51933ad0316080 Mon Sep 17 00:00:00 2001 > From: Gavin Shan <shangw@xxxxxxxxxxxxxxxxxx> > Date: Tue, 25 Feb 2014 15:28:37 +0800 > Subject: [PATCH] powerpc/powernv: Dump PHB diag-data immediately > > The PHB diag-data is important to help locating the root cause for > EEH errors such as frozen PE or fenced PHB. However, the EEH core > enables IO path by clearing part of HW registers before collecting > this data causing it to be corrupted. > > This patch fixes this by dumping the PHB diag-data immediately when > frozen/fenced state on PE or PHB is detected for the first time in > eeh_ops::get_state() or next_error() backend. > > Signed-off-by: Gavin Shan <shangw@xxxxxxxxxxxxxxxxxx> > CC: <stable@xxxxxxxxxxxxxxx> > Signed-off-by: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> > > diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c > index f51474336460..253fefe3d1a0 100644 > --- a/arch/powerpc/platforms/powernv/eeh-ioda.c > +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c > @@ -114,6 +114,7 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get, > ioda_eeh_inbB_dbgfs_set, "0x%llx\n"); > #endif /* CONFIG_DEBUG_FS */ > > + > /** > * ioda_eeh_post_init - Chip dependent post initialization > * @hose: PCI controller > @@ -221,6 +222,22 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) > return ret; > } > > +static void ioda_eeh_phb_diag(struct pci_controller *hose) > +{ > + struct pnv_phb *phb = hose->private_data; > + long rc; > + > + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, > + PNV_PCI_DIAG_BUF_SIZE); > + if (rc != OPAL_SUCCESS) { > + pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", > + __func__, hose->global_number, rc); > + return; > + } > + > + pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); > +} > + > /** > * ioda_eeh_get_state - Retrieve the state of PE > * @pe: EEH PE > @@ -272,6 +289,9 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) > result |= EEH_STATE_DMA_ACTIVE; > result |= EEH_STATE_MMIO_ENABLED; > result |= EEH_STATE_DMA_ENABLED; > + } else if (!(pe->state & EEH_PE_ISOLATED)) { > + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); > + ioda_eeh_phb_diag(hose); > } > > return result; > @@ -315,6 +335,15 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) > __func__, fstate, hose->global_number, pe_no); > } > > + /* Dump PHB diag-data for frozen PE */ > + if (result != EEH_STATE_NOT_SUPPORT && > + (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) != > + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) && > + !(pe->state & EEH_PE_ISOLATED)) { > + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); > + ioda_eeh_phb_diag(hose); > + } > + > return result; > } > > @@ -530,42 +559,6 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) > } > > /** > - * ioda_eeh_get_log - Retrieve error log > - * @pe: EEH PE > - * @severity: Severity level of the log > - * @drv_log: buffer to store the log > - * @len: space of the log buffer > - * > - * The function is used to retrieve error log from P7IOC. > - */ > -static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, > - char *drv_log, unsigned long len) > -{ > - s64 ret; > - unsigned long flags; > - struct pci_controller *hose = pe->phb; > - struct pnv_phb *phb = hose->private_data; > - > - spin_lock_irqsave(&phb->lock, flags); > - > - ret = opal_pci_get_phb_diag_data2(phb->opal_id, > - phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); > - if (ret) { > - spin_unlock_irqrestore(&phb->lock, flags); > - pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n", > - __func__, hose->global_number, pe->addr, ret); > - return -EIO; > - } > - > - /* The PHB diag-data is always indicative */ > - pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); > - > - spin_unlock_irqrestore(&phb->lock, flags); > - > - return 0; > -} > - > -/** > * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE > * @pe: EEH PE > * > @@ -646,22 +639,6 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) > } > } > > -static void ioda_eeh_phb_diag(struct pci_controller *hose) > -{ > - struct pnv_phb *phb = hose->private_data; > - long rc; > - > - rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, > - PNV_PCI_DIAG_BUF_SIZE); > - if (rc != OPAL_SUCCESS) { > - pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", > - __func__, hose->global_number, rc); > - return; > - } > - > - pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); > -} > - > static int ioda_eeh_get_phb_pe(struct pci_controller *hose, > struct eeh_pe **pe) > { > @@ -835,6 +812,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) > } > > /* > + * EEH core will try recover from fenced PHB or > + * frozen PE. In the time for frozen PE, EEH core > + * enable IO path for that before collecting logs, > + * but it ruins the site. So we have to dump the > + * log in advance here. > + */ > + if ((ret == EEH_NEXT_ERR_FROZEN_PE || > + ret == EEH_NEXT_ERR_FENCED_PHB) && > + !((*pe)->state & EEH_PE_ISOLATED)) { > + eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); > + ioda_eeh_phb_diag(hose); > + } > + > + /* > * If we have no errors on the specific PHB or only > * informative error there, we continue poking it. > * Otherwise, we need actions to be taken by upper > @@ -852,7 +843,6 @@ struct pnv_eeh_ops ioda_eeh_ops = { > .set_option = ioda_eeh_set_option, > .get_state = ioda_eeh_get_state, > .reset = ioda_eeh_reset, > - .get_log = ioda_eeh_get_log, > .configure_bridge = ioda_eeh_configure_bridge, > .next_error = ioda_eeh_next_error > }; -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html