On Tue, Jan 5, 2021 at 11:49 AM Christian König <christian.koenig@xxxxxxx> wrote: > > Am 05.01.21 um 17:06 schrieb Defang Bo: > > Similar to commit <b82175750131>("drm/amdgpu: fix IH overflow on Vega10 v2"). > > When an ring buffer overflow happens the appropriate bit is set in the WPTR > > register which is also written back to memory. But clearing the bit in the > > WPTR doesn't trigger another memory writeback. > > > > So what can happen is that we end up processing the buffer overflow over and > > over again because the bit is never cleared. Resulting in a random system > > lockup because of an infinite loop in an interrupt handler. > > > > Signed-off-by: Defang Bo <bodefang@xxxxxxx> > > Reviewed-by: Christian König <christian.koenig@xxxxxxx> > > Thanks for the help, > Christian. Applied. Thanks! Alex > > > --- > > Changes since v1: > > - Modify the subject and replace the wrong register. > > --- > > --- > > drivers/gpu/drm/amd/amdgpu/cz_ih.c | 39 +++++++++++++++++++++------------ > > drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 36 +++++++++++++++++++----------- > > drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 37 ++++++++++++++++++++----------- > > 3 files changed, 72 insertions(+), 40 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c > > index 1dca0cabc326..65361afb21ab 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c > > +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c > > @@ -190,22 +190,33 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, > > struct amdgpu_ih_ring *ih) > > { > > u32 wptr, tmp; > > - > > + > > wptr = le32_to_cpu(*ih->wptr_cpu); > > > > - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { > > - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > > - /* When a ring buffer overflow happen start parsing interrupt > > - * from the last not overwritten vector (wptr + 16). Hopefully > > - * this should allow us to catchup. > > - */ > > - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", > > - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); > > - ih->rptr = (wptr + 16) & ih->ptr_mask; > > - tmp = RREG32(mmIH_RB_CNTL); > > - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); > > - WREG32(mmIH_RB_CNTL, tmp); > > - } > > + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) > > + goto out; > > + > > + /* Double check that the overflow wasn't already cleared. */ > > + wptr = RREG32(mmIH_RB_WPTR); > > + > > + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) > > + goto out; > > + > > + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > > + > > + /* When a ring buffer overflow happen start parsing interrupt > > + * from the last not overwritten vector (wptr + 16). Hopefully > > + * this should allow us to catchup. > > + */ > > + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", > > + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); > > + ih->rptr = (wptr + 16) & ih->ptr_mask; > > + tmp = RREG32(mmIH_RB_CNTL); > > + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); > > + WREG32(mmIH_RB_CNTL, tmp); > > + > > + > > +out: > > return (wptr & ih->ptr_mask); > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c > > index a13dd9a51149..8e4dae8addb9 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c > > +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c > > @@ -193,19 +193,29 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, > > > > wptr = le32_to_cpu(*ih->wptr_cpu); > > > > - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { > > - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > > - /* When a ring buffer overflow happen start parsing interrupt > > - * from the last not overwritten vector (wptr + 16). Hopefully > > - * this should allow us to catchup. > > - */ > > - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", > > - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); > > - ih->rptr = (wptr + 16) & ih->ptr_mask; > > - tmp = RREG32(mmIH_RB_CNTL); > > - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); > > - WREG32(mmIH_RB_CNTL, tmp); > > - } > > + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) > > + goto out; > > + > > + /* Double check that the overflow wasn't already cleared. */ > > + wptr = RREG32(mmIH_RB_WPTR); > > + > > + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) > > + goto out; > > + > > + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > > + /* When a ring buffer overflow happen start parsing interrupt > > + * from the last not overwritten vector (wptr + 16). Hopefully > > + * this should allow us to catchup. > > + */ > > + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", > > + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); > > + ih->rptr = (wptr + 16) & ih->ptr_mask; > > + tmp = RREG32(mmIH_RB_CNTL); > > + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); > > + WREG32(mmIH_RB_CNTL, tmp); > > + > > + > > +out: > > return (wptr & ih->ptr_mask); > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c > > index e40140bf6699..2ba1ce323b6d 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c > > +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c > > @@ -195,19 +195,30 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, > > > > wptr = le32_to_cpu(*ih->wptr_cpu); > > > > - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { > > - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > > - /* When a ring buffer overflow happen start parsing interrupt > > - * from the last not overwritten vector (wptr + 16). Hopefully > > - * this should allow us to catchup. > > - */ > > - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", > > - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); > > - ih->rptr = (wptr + 16) & ih->ptr_mask; > > - tmp = RREG32(mmIH_RB_CNTL); > > - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); > > - WREG32(mmIH_RB_CNTL, tmp); > > - } > > + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) > > + goto out; > > + > > + /* Double check that the overflow wasn't already cleared. */ > > + wptr = RREG32(mmIH_RB_WPTR); > > + > > + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) > > + goto out; > > + > > + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); > > + > > + /* When a ring buffer overflow happen start parsing interrupt > > + * from the last not overwritten vector (wptr + 16). Hopefully > > + * this should allow us to catchup. > > + */ > > + > > + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", > > + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); > > + ih->rptr = (wptr + 16) & ih->ptr_mask; > > + tmp = RREG32(mmIH_RB_CNTL); > > + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); > > + WREG32(mmIH_RB_CNTL, tmp); > > + > > +out: > > return (wptr & ih->ptr_mask); > > } > > > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx