Hi Jörg, On Tue, May 03, 2022 at 12:17:40AM +0200, Jörg-Volker Peetz wrote: > May 2 21:50:27 xxx kernel: WARNING: CPU: 0 PID: 1 at > drivers/iommu/amd/init.c:851 amd_iommu_enable_interrupts+0x312/0x3f0 Are you sure you tested the right kernel? My patch removes that warning, so it can't trigger anymore. It also adds a new warning, but in different file and line. > In 'kern.log' I also found this: > > May 2 21:53:27 xxx kernel: [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring gfx > timeout, signaled seq=16, emitted seq=17 GPU errors, hard to say what triggered this. Can you please send me your exact MB and CPU model? There is a chance this is firmware-related. Besides that I learned that on some systems this warning only triggers on resume. So increasing the timeout seems to be the only viable fix. Can you please try the attached diff? It also prints the time it took to enable the GA log. Regards, Joerg
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 7bfe37e52e21..12eb83d22019 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -22,6 +22,7 @@ #include <linux/kmemleak.h> #include <linux/cc_platform.h> #include <linux/iopoll.h> +#include <linux/ktime.h> #include <asm/pci-direct.h> #include <asm/iommu.h> #include <asm/apic.h> @@ -84,7 +85,7 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 -#define LOOP_TIMEOUT 100000 +#define LOOP_TIMEOUT 10000000 /* * ACPI table definitions * @@ -816,6 +817,7 @@ static void free_ga_log(struct amd_iommu *iommu) static int iommu_ga_log_enable(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP + ktime_t start, end; u32 status, i; u64 entry; @@ -841,15 +843,20 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_GAINT_EN); iommu_feature_enable(iommu, CONTROL_GALOG_EN); + start = ktime_get(); for (i = 0; i < LOOP_TIMEOUT; ++i) { status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & (MMIO_STATUS_GALOG_RUN_MASK)) break; udelay(10); } + end = ktime_get(); if (WARN_ON(i >= LOOP_TIMEOUT)) return -EINVAL; + + pr_info("Enabling GA log took %lld ms\n", ktime_to_ms(ktime_sub(end, start))); + #endif /* CONFIG_IRQ_REMAP */ return 0; }