PM runtime can be done simultaneously with AER error handling. Avoid that by using pm_runtime_get_sync() just after pci_dev_get() and pm_runtime_put() just before pci_dev_put() in AER recovery procedures. I'm not sure about DPC case since I do not see get/put there. It just call pci_do_recovery() from threaded irq dcd_handler(). I think pm_runtime* should be added to this handler as well. pm_runtime_get_sync() will increase dev->power.usage_count counter to prevent any rpm actives. When there is suspending pending, it will wait for it and do the rpm resume. Not sure if that problem, on my testing I did not encounter issues with that. I tested with igc device by doing simultaneous aer_inject and rpm suspend/resume via /sys/bus/pci/devices/PCI_ID/power/control and can reproduce: [ 853.253938] igc 0000:02:00.0: not ready 65535ms after bus reset; giving up [ 853.253973] pcieport 0000:00:1c.2: AER: Root Port link has been reset (-25) [ 853.253996] pcieport 0000:00:1c.2: AER: subordinate device reset failed [ 853.254099] pcieport 0000:00:1c.2: AER: device recovery failed [ 853.254178] igc 0000:02:00.0: Unable to change power state from D3hot to D0, device inaccessible The problem disappears when applied this patch. Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@xxxxxxxxxxxxxxx> --- drivers/pci/pcie/aer.c | 8 ++++++++ drivers/pci/pcie/edr.c | 3 +++ 2 files changed, 11 insertions(+) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 42a3bd35a3e1..9b56460edc76 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -23,6 +23,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/pm.h> +#include <linux/pm_runtime.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/delay.h> @@ -813,6 +814,7 @@ static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) { if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); + pm_runtime_get_sync(&dev->dev); e_info->error_dev_num++; return 0; } @@ -1111,6 +1113,8 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) { cxl_rch_handle_error(dev, info); pci_aer_handle_error(dev, info); + + pm_runtime_put(&dev->dev); pci_dev_put(dev); } @@ -1143,6 +1147,8 @@ static void aer_recover_work_func(struct work_struct *work) PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); continue; } + pm_runtime_get_sync(&pdev->dev); + pci_print_aer(pdev, entry.severity, entry.regs); /* * Memory for aer_capability_regs(entry.regs) is being allocated from the @@ -1159,6 +1165,8 @@ static void aer_recover_work_func(struct work_struct *work) else if (entry.severity == AER_FATAL) pcie_do_recovery(pdev, pci_channel_io_frozen, aer_root_reset); + + pm_runtime_put(&pdev->dev); pci_dev_put(pdev); } } diff --git a/drivers/pci/pcie/edr.c b/drivers/pci/pcie/edr.c index 5f4914d313a1..bd96babd7249 100644 --- a/drivers/pci/pcie/edr.c +++ b/drivers/pci/pcie/edr.c @@ -10,6 +10,7 @@ #include <linux/pci.h> #include <linux/pci-acpi.h> +#include <linux/pm_runtime.h> #include "portdrv.h" #include "../pci.h" @@ -169,6 +170,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data) return; } + pm_runtime_get_sync(&edev->dev); pci_dbg(pdev, "Reported EDR dev: %s\n", pci_name(edev)); /* If port does not support DPC, just send the OST */ @@ -209,6 +211,7 @@ static void edr_handle_event(acpi_handle handle, u32 event, void *data) acpi_send_edr_status(pdev, edev, EDR_OST_FAILED); } + pm_runtime_put(&edev->dev); pci_dev_put(edev); } -- 2.34.1