Hi, Here is a patch to fix power fault interrupt storm problem reported by Jens Axboe. Thanks, Kenji Kaneshige Enabling power fault detected event notification in current pciehp might cause power fault interrupt storm on some machines. On those machines. On those machines, power fault detected bit in the slot status register was set again immediately when it is cleared in the interrupt service routine, and next power fault detected interrupt was notified again. Therefore, disable power fault detected event notification for now. This patch also removes unnecessary handling for power fault cleared event because this event is not supported by PCIe spec. Signed-off-by: Kenji Kaneshige <kaneshige.kenji@xxxxxxxxxxxxxx> --- drivers/pci/hotplug/pciehp_ctrl.c | 27 ++++++--------------------- drivers/pci/hotplug/pciehp_hpc.c | 26 +++++++++++--------------- 2 files changed, 17 insertions(+), 36 deletions(-) Index: 20091110/drivers/pci/hotplug/pciehp_ctrl.c =================================================================== --- 20091110.orig/drivers/pci/hotplug/pciehp_ctrl.c +++ 20091110/drivers/pci/hotplug/pciehp_ctrl.c @@ -142,23 +142,9 @@ u8 pciehp_handle_power_fault(struct slot /* power fault */ ctrl_dbg(ctrl, "Power fault interrupt received\n"); - - if (!pciehp_query_power_fault(p_slot)) { - /* - * power fault Cleared - */ - ctrl_info(ctrl, "Power fault cleared on Slot(%s)\n", - slot_name(p_slot)); - event_type = INT_POWER_FAULT_CLEAR; - } else { - /* - * power fault - */ - ctrl_info(ctrl, "Power fault on Slot(%s)\n", slot_name(p_slot)); - event_type = INT_POWER_FAULT; - ctrl_info(ctrl, "Power fault bit %x set\n", 0); - } - + ctrl_err(ctrl, "Power fault on slot %s\n", slot_name(p_slot)); + event_type = INT_POWER_FAULT; + ctrl_info(ctrl, "Power fault bit %x set\n", 0); queue_interrupt_event(p_slot, event_type); return 1; @@ -224,13 +210,12 @@ static int board_added(struct slot *p_sl retval = pciehp_check_link_status(ctrl); if (retval) { ctrl_err(ctrl, "Failed to check link status\n"); - set_slot_off(ctrl, p_slot); - return retval; + goto err_exit; } /* Check for a power fault */ - if (pciehp_query_power_fault(p_slot)) { - ctrl_dbg(ctrl, "Power fault detected\n"); + if (ctrl->power_fault_detected || pciehp_query_power_fault(p_slot)) { + ctrl_err(ctrl, "Power fault on slot %s\n", slot_name(p_slot)); retval = -EIO; goto err_exit; } Index: 20091110/drivers/pci/hotplug/pciehp_hpc.c =================================================================== --- 20091110.orig/drivers/pci/hotplug/pciehp_hpc.c +++ 20091110/drivers/pci/hotplug/pciehp_hpc.c @@ -511,15 +511,10 @@ int pciehp_power_on_slot(struct slot * s return retval; } } + ctrl->power_fault_detected = 0; slot_cmd = POWER_ON; cmd_mask = PCI_EXP_SLTCTL_PCC; - if (!pciehp_poll_mode) { - /* Enable power fault detection turned off at power off time */ - slot_cmd |= PCI_EXP_SLTCTL_PFDE; - cmd_mask |= PCI_EXP_SLTCTL_PFDE; - } - retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); if (retval) { ctrl_err(ctrl, "Write %x command failed!\n", slot_cmd); @@ -528,7 +523,6 @@ int pciehp_power_on_slot(struct slot * s ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd); - ctrl->power_fault_detected = 0; return retval; } @@ -541,12 +535,6 @@ int pciehp_power_off_slot(struct slot * slot_cmd = POWER_OFF; cmd_mask = PCI_EXP_SLTCTL_PCC; - if (!pciehp_poll_mode) { - /* Disable power fault detection */ - slot_cmd &= ~PCI_EXP_SLTCTL_PFDE; - cmd_mask |= PCI_EXP_SLTCTL_PFDE; - } - retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); if (retval) { ctrl_err(ctrl, "Write command failed!\n"); @@ -790,11 +778,19 @@ int pcie_enable_notification(struct cont { u16 cmd, mask; + /* + * TBD: Power fault detected software notification support. + * + * Power fault detected software notification is not enabled + * now, because it caused power fault detected interrupt storm + * on some machines. On those machines, power fault detected + * bit in the slot status register was set again immediately + * when it is cleared in the interrupt service routine, and + * next power fault detected interrupt was notified again. + */ cmd = PCI_EXP_SLTCTL_PDCE; if (ATTN_BUTTN(ctrl)) cmd |= PCI_EXP_SLTCTL_ABPE; - if (POWER_CTRL(ctrl)) - cmd |= PCI_EXP_SLTCTL_PFDE; if (MRL_SENS(ctrl)) cmd |= PCI_EXP_SLTCTL_MRLSCE; if (!pciehp_poll_mode) -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html