This is a note to let you know that I've just added the patch titled drm/amdgpu: Add check to prevent IH overflow to the 5.11-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: drm-amdgpu-add-check-to-prevent-ih-overflow.patch and it can be found in the queue-5.11 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit 1a70d5468d892fb960a34db8b299b97c556d86c5 Author: Defang Bo <bodefang@xxxxxxx> Date: Wed Jan 6 00:06:39 2021 +0800 drm/amdgpu: Add check to prevent IH overflow [ Upstream commit e4180c4253f3f2da09047f5139959227f5cf1173 ] Similar to commit <b82175750131>("drm/amdgpu: fix IH overflow on Vega10 v2"). When an ring buffer overflow happens the appropriate bit is set in the WPTR register which is also written back to memory. But clearing the bit in the WPTR doesn't trigger another memory writeback. So what can happen is that we end up processing the buffer overflow over and over again because the bit is never cleared. Resulting in a random system lockup because of an infinite loop in an interrupt handler. Reviewed-by: Christian König <christian.koenig@xxxxxxx> Signed-off-by: Defang Bo <bodefang@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index da37f8a900af..307c01301c87 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -194,19 +194,30 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + + +out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 37d8b6ca4dab..cc957471f31e 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -194,19 +194,29 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + + +out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index ce3319993b4b..249fcbee7871 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -196,19 +196,30 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + +out: return (wptr & ih->ptr_mask); }