If the IH ring buffer overflows, it's possible that fence signal events were lost. Check each ring for progress to prevent job timeouts/GPU hangs due to the fences staying unsignaled despite the work being done. Cc: Joshua Ashton <joshua@xxxxxxxxx> Cc: Alex Deucher <alexander.deucher@xxxxxxx> Cc: Christian König <christian.koenig@xxxxxxx> Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Friedrich Vock <friedrich.vock@xxxxxx> --- v2: Set ih->overflow to false after processing fence v3: Move everything related to fence processing on overflow to patch 2 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 16 ++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 2 ++ drivers/gpu/drm/amd/amdgpu/cik_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/cz_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/ih_v6_1.c | 1 + drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/si_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 1 + drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 1 + 12 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f3b0aaf3ebc6..4e061f7741d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -209,6 +209,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { unsigned int count; u32 wptr; + int i; if (!ih->enabled || adev->shutdown) return IRQ_NONE; @@ -227,6 +228,21 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) ih->rptr &= ih->ptr_mask; } + /* If the ring buffer overflowed, we might have lost some fence + * signal interrupts. Check if there was any activity so the signal + * doesn't get lost. + */ + if (ih->overflow) { + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->fence_drv.initialized) + continue; + amdgpu_fence_process(ring); + } + ih->overflow = false; + } + amdgpu_ih_set_rptr(adev, ih); wake_up_all(&ih->wait_process); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 508f02eb0cf8..6041ec727f06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -69,6 +69,8 @@ struct amdgpu_ih_ring { unsigned rptr; struct amdgpu_ih_regs ih_regs; + bool overflow; + /* For waiting on IH processing at checkpoint. */ wait_queue_head_t wait_process; uint64_t processed_timestamp; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index f24e34dc33d1..bbadf2e530b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -210,6 +210,7 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, */ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + ih->overflow = true; } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index c19681492efa..e5c4ed44bad9 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -221,6 +221,7 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, */ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); WREG32(mmIH_RB_CNTL, tmp); + ih->overflow = true; out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 2c02ae69883d..075e5c1a5549 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -220,6 +220,7 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, */ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); WREG32(mmIH_RB_CNTL, tmp); + ih->overflow = true; out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index ad4ad39f128f..d0a5a08edd55 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -424,6 +424,7 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, */ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + ih->overflow = true; out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index b8da0fc29378..6bf4f210ef74 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -424,6 +424,7 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev, */ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + ih->overflow = true; out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index de93614726c9..cdbe7d01490e 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -448,6 +448,7 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, */ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + ih->overflow = true; out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index cada9f300a7f..398fbc296cac 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -125,6 +125,7 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, */ tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + ih->overflow = true; } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 450b6e831509..1d1e064be7d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -224,6 +224,7 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, */ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); WREG32(mmIH_RB_CNTL, tmp); + ih->overflow = true; out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index bf68e18e3824..619087a4c4ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -378,6 +378,7 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, */ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + ih->overflow = true; out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index db66e6cccaf2..f42f8e5dbe23 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -426,6 +426,7 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, */ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + ih->overflow = true; out: return (wptr & ih->ptr_mask); -- 2.43.0