Signed-off-by: Ben Skeggs <bskeggs@xxxxxxxxxx> --- .../gpu/drm/nouveau/include/nvkm/core/pci.h | 2 + drivers/gpu/drm/nouveau/nouveau_drm.c | 61 ------------------ drivers/gpu/drm/nouveau/nouveau_drv.h | 2 - drivers/gpu/drm/nouveau/nvkm/device/pci.c | 62 +++++++++++++++++++ 4 files changed, 64 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/core/pci.h index 0797225ab038..95deea8c65ff 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/pci.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/pci.h @@ -7,6 +7,8 @@ struct nvkm_device_pci { struct nvkm_device device; struct pci_dev *pdev; + u8 old_pm_cap; + struct dev_pm_domain vga_pm_domain; }; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 4bcfc2291c4d..76eddf172bb5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -731,63 +731,6 @@ nouveau_drm_device_new(const struct drm_driver *drm_driver, struct device *paren return ret ? ERR_PTR(ret) : drm; } -/* - * On some Intel PCIe bridge controllers doing a - * D0 -> D3hot -> D3cold -> D0 sequence causes Nvidia GPUs to not reappear. - * Skipping the intermediate D3hot step seems to make it work again. This is - * probably caused by not meeting the expectation the involved AML code has - * when the GPU is put into D3hot state before invoking it. - * - * This leads to various manifestations of this issue: - * - AML code execution to power on the GPU hits an infinite loop (as the - * code waits on device memory to change). - * - kernel crashes, as all PCI reads return -1, which most code isn't able - * to handle well enough. - * - * In all cases dmesg will contain at least one line like this: - * 'nouveau 0000:01:00.0: Refused to change power state, currently in D3' - * followed by a lot of nouveau timeouts. - * - * In the \_SB.PCI0.PEG0.PG00._OFF code deeper down writes bit 0x80 to the not - * documented PCI config space register 0x248 of the Intel PCIe bridge - * controller (0x1901) in order to change the state of the PCIe link between - * the PCIe port and the GPU. There are alternative code paths using other - * registers, which seem to work fine (executed pre Windows 8): - * - 0xbc bit 0x20 (publicly available documentation claims 'reserved') - * - 0xb0 bit 0x10 (link disable) - * Changing the conditions inside the firmware by poking into the relevant - * addresses does resolve the issue, but it seemed to be ACPI private memory - * and not any device accessible memory at all, so there is no portable way of - * changing the conditions. - * On a XPS 9560 that means bits [0,3] on \CPEX need to be cleared. - * - * The only systems where this behavior can be seen are hybrid graphics laptops - * with a secondary Nvidia Maxwell, Pascal or Turing GPU. It's unclear whether - * this issue only occurs in combination with listed Intel PCIe bridge - * controllers and the mentioned GPUs or other devices as well. - * - * documentation on the PCIe bridge controller can be found in the - * "7th Generation Intel® Processor Families for H Platforms Datasheet Volume 2" - * Section "12 PCI Express* Controller (x16) Registers" - */ - -static void quirk_broken_nv_runpm(struct pci_dev *pdev) -{ - struct nouveau_drm *drm = pci_get_drvdata(pdev); - struct pci_dev *bridge = pci_upstream_bridge(pdev); - - if (!bridge || bridge->vendor != PCI_VENDOR_ID_INTEL) - return; - - switch (bridge->device) { - case 0x1901: - drm->old_pm_cap = pdev->pm_cap; - pdev->pm_cap = 0; - NV_INFO(drm, "Disabling PCI power management to avoid bug\n"); - break; - } -} - static int nouveau_drm_probe(struct pci_dev *pdev, const struct pci_device_id *pent) { @@ -822,7 +765,6 @@ static int nouveau_drm_probe(struct pci_dev *pdev, else drm_fbdev_ttm_setup(drm->dev, 32); - quirk_broken_nv_runpm(pdev); return 0; fail_drm: @@ -846,9 +788,6 @@ nouveau_drm_remove(struct pci_dev *pdev) { struct nouveau_drm *drm = pci_get_drvdata(pdev); - /* revert our workaround */ - if (drm->old_pm_cap) - pdev->pm_cap = drm->old_pm_cap; nouveau_drm_device_remove(drm); nvkm_device_pci_driver.remove(pdev); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index b44f0d408ccc..9ca0f6ab4359 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -218,8 +218,6 @@ struct nouveau_drm { */ struct mutex clients_lock; - u8 old_pm_cap; - struct { struct agp_bridge_data *bridge; u32 base; diff --git a/drivers/gpu/drm/nouveau/nvkm/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/device/pci.c index d9b8e3bc4169..d454d56a7909 100644 --- a/drivers/gpu/drm/nouveau/nvkm/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/device/pci.c @@ -1598,6 +1598,11 @@ static void * nvkm_device_pci_dtor(struct nvkm_device *device) { struct nvkm_device_pci *pdev = nvkm_device_pci(device); + + /* revert our workaround */ + if (pdev->old_pm_cap) + pdev->pdev->pm_cap = pdev->old_pm_cap; + pci_disable_device(pdev->pdev); return pdev; } @@ -1624,6 +1629,62 @@ nvkm_device_pci_remove(struct pci_dev *dev) nvkm_device_del(&device); } +/* + * On some Intel PCIe bridge controllers doing a + * D0 -> D3hot -> D3cold -> D0 sequence causes Nvidia GPUs to not reappear. + * Skipping the intermediate D3hot step seems to make it work again. This is + * probably caused by not meeting the expectation the involved AML code has + * when the GPU is put into D3hot state before invoking it. + * + * This leads to various manifestations of this issue: + * - AML code execution to power on the GPU hits an infinite loop (as the + * code waits on device memory to change). + * - kernel crashes, as all PCI reads return -1, which most code isn't able + * to handle well enough. + * + * In all cases dmesg will contain at least one line like this: + * 'nouveau 0000:01:00.0: Refused to change power state, currently in D3' + * followed by a lot of nouveau timeouts. + * + * In the \_SB.PCI0.PEG0.PG00._OFF code deeper down writes bit 0x80 to the not + * documented PCI config space register 0x248 of the Intel PCIe bridge + * controller (0x1901) in order to change the state of the PCIe link between + * the PCIe port and the GPU. There are alternative code paths using other + * registers, which seem to work fine (executed pre Windows 8): + * - 0xbc bit 0x20 (publicly available documentation claims 'reserved') + * - 0xb0 bit 0x10 (link disable) + * Changing the conditions inside the firmware by poking into the relevant + * addresses does resolve the issue, but it seemed to be ACPI private memory + * and not any device accessible memory at all, so there is no portable way of + * changing the conditions. + * On a XPS 9560 that means bits [0,3] on \CPEX need to be cleared. + * + * The only systems where this behavior can be seen are hybrid graphics laptops + * with a secondary Nvidia Maxwell, Pascal or Turing GPU. It's unclear whether + * this issue only occurs in combination with listed Intel PCIe bridge + * controllers and the mentioned GPUs or other devices as well. + * + * documentation on the PCIe bridge controller can be found in the + * "7th Generation Intel® Processor Families for H Platforms Datasheet Volume 2" + * Section "12 PCI Express* Controller (x16) Registers" + */ + +static void quirk_broken_nv_runpm(struct nvkm_device_pci *pdev) +{ + struct pci_dev *bridge = pci_upstream_bridge(pdev->pdev); + + if (!bridge || bridge->vendor != PCI_VENDOR_ID_INTEL) + return; + + switch (bridge->device) { + case 0x1901: + pdev->old_pm_cap = pdev->pdev->pm_cap; + pdev->pdev->pm_cap = 0; + nvdev_info(&pdev->device, "Disabling PCI power management to avoid bug\n"); + break; + } +} + static int nvkm_device_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { @@ -1701,6 +1762,7 @@ nvkm_device_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) pdev->device.mmu->dma_bits = 32; } + quirk_broken_nv_runpm(pdev); done: if (ret) { nvkm_device_del(&device); -- 2.44.0