On Thu, Nov 4, 2021 at 2:20 AM Evan Quan <evan.quan@xxxxxxx> wrote: > > As part of the ib padding process, accessing the RLC_SPM_* register may > trigger gfx hang. Since gfxoff may be already kicked during the whole period. > To address that, we manually toggle gfx on/off around the RLC_SPM_* > register access. > > This can resolve the gfx hang issue observed on running Talos with RDP launched > in parallel. > > Signed-off-by: Evan Quan <evan.quan@xxxxxxx> > Change-Id: Ifae152e8151fecd25a238ebe87dffb3b17cdb540 Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +++++ > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 4 ++++ > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++++ > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++++ > 4 files changed, 17 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index fa03db34aec4..10fc9197602e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -8388,6 +8388,9 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, > static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) > { > u32 reg, data; > + > + amdgpu_gfx_off_ctrl(adev, false); > + > /* not for *_SOC15 */ > reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); > if (amdgpu_sriov_is_pp_one_vf(adev)) > @@ -8402,6 +8405,8 @@ static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) > WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); > else > WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); > + > + amdgpu_gfx_off_ctrl(adev, true); > } > > static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev, > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > index 37b4a3db6360..d17a6f399347 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > @@ -3575,12 +3575,16 @@ static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) > { > u32 data; > > + amdgpu_gfx_off_ctrl(adev, false); > + > data = RREG32(mmRLC_SPM_VMID); > > data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK; > data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT; > > WREG32(mmRLC_SPM_VMID, data); > + > + amdgpu_gfx_off_ctrl(adev, true); > } > > static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index aefae5b1ff7b..1a476de20d08 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -5727,6 +5727,8 @@ static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) > { > u32 data; > > + amdgpu_gfx_off_ctrl(adev, false); > + > if (amdgpu_sriov_is_pp_one_vf(adev)) > data = RREG32_NO_KIQ(mmRLC_SPM_VMID); > else > @@ -5739,6 +5741,8 @@ static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) > WREG32_NO_KIQ(mmRLC_SPM_VMID, data); > else > WREG32(mmRLC_SPM_VMID, data); > + > + amdgpu_gfx_off_ctrl(adev, true); > } > > static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 08e91e7245df..d9367747fed3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -5218,6 +5218,8 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) > { > u32 reg, data; > > + amdgpu_gfx_off_ctrl(adev, false); > + > reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); > if (amdgpu_sriov_is_pp_one_vf(adev)) > data = RREG32_NO_KIQ(reg); > @@ -5231,6 +5233,8 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) > WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); > else > WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); > + > + amdgpu_gfx_off_ctrl(adev, true); > } > > static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev, > -- > 2.29.0 >