From: Anatoli Antonovitch <anatoli.antonovitch@xxxxxxx> It is to avoid any potential issues when S3 resume but at the same time we want to hot-unplug. Need to do some more testing to see if it is still necessary. To fix the race between pciehp and AER reported in https://bugzilla.kernel.org/show_bug.cgi?id=215590 INFO: task irq/26-aerdrv:104 blocked for more than 120 seconds. Tainted: G W 6.1.0-rc5-custom-master-nov14+ #2 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:irq/26-aerdrv state:D stack:0 pid:104 ppid:2 flags:0x00004000 Call Trace: <TASK> __schedule+0x39c/0xe90 ? rcu_read_lock_sched_held+0x25/0x80 schedule+0x6b/0xf0 rwsem_down_write_slowpath+0x3b2/0x9c0 down_write_nested+0x16b/0x210 pciehp_reset_slot+0x63/0x160 pci_reset_hotplug_slot+0x44/0x70 pci_slot_reset+0x10d/0x190 pci_bus_error_reset+0xb2/0xe0 aer_root_reset+0x144/0x190 pcie_do_recovery+0x15a/0x270 ? aer_dev_correctable_show+0xc0/0xc0 aer_process_err_devices+0xcf/0xea aer_isr.cold+0x52/0xa1 ? __kmem_cache_free+0x36a/0x3b0 ? irq_thread+0xb0/0x1e0 ? irq_thread+0xb0/0x1e0 irq_thread_fn+0x28/0x70 irq_thread+0x106/0x1e0 ? irq_forced_thread_fn+0xb0/0xb0 ? wake_threads_waitq+0x40/0x40 ? irq_thread_check_affinity+0xf0/0xf0 kthread+0x10a/0x130 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 </TASK> INFO: task irq/26-pciehp:105 blocked for more than 120 seconds. Tainted: G W 6.1.0-rc5-custom-master-nov14+ #2 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:irq/26-pciehp state:D stack:0 pid:105 ppid:2 flags:0x00004000 Call Trace: <TASK> __schedule+0x39c/0xe90 schedule+0x6b/0xf0 schedule_preempt_disabled+0x18/0x30 __mutex_lock+0x685/0xf60 ? rcu_read_lock_sched_held+0x25/0x80 ? lock_release+0x24d/0x410 ? __device_driver_lock+0x2d/0x50 mutex_lock_nested+0x1b/0x30 ? mutex_lock_nested+0x1b/0x30 __device_driver_lock+0x2d/0x50 device_release_driver_internal+0x1f/0x170 device_release_driver+0x12/0x20 pci_stop_bus_device+0x74/0xa0 pci_stop_bus_device+0x30/0xa0 pci_stop_and_remove_bus_device+0x13/0x30 pciehp_unconfigure_device+0x80/0x140 pciehp_disable_slot+0x6e/0x110 pciehp_handle_presence_or_link_change+0xf1/0x310 pciehp_ist+0x1a0/0x1b0 ? irq_thread+0xb0/0x1e0 irq_thread_fn+0x28/0x70 irq_thread+0x106/0x1e0 ? irq_forced_thread_fn+0xb0/0xb0 ? wake_threads_waitq+0x40/0x40 ? irq_thread_check_affinity+0xf0/0xf0 kthread+0x10a/0x130 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 </TASK> Signed-off-by: Anatoli Antonovitch <anatoli.antonovitch@xxxxxxx> --- drivers/pci/hotplug/pciehp_hpc.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 10e9670eea0b..b1084e67f798 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -27,6 +27,8 @@ #include "../pci.h" #include "pciehp.h" +static DECLARE_RWSEM(hotplug_slot_rwsem); + static const struct dmi_system_id inband_presence_disabled_dmi_table[] = { /* * Match all Dell systems, as some Dell systems have inband @@ -911,7 +913,10 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) if (probe) return 0; - down_write_nested(&ctrl->reset_lock, ctrl->depth); + if (ctrl->depth > 0) + down_write_nested(&ctrl->reset_lock, ctrl->depth); + else + down_write(&hotplug_slot_rwsem); if (!ATTN_BUTTN(ctrl)) { ctrl_mask |= PCI_EXP_SLTCTL_PDCE; @@ -931,7 +936,11 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); - up_write(&ctrl->reset_lock); + if (ctrl->depth > 0) + up_write(&ctrl->reset_lock); + else + up_write(&hotplug_slot_rwsem); + return rc; } -- 2.25.1