On Tue, Feb 13, 2024 at 2:03 PM Victor Lu <victorchengchi.lu@xxxxxxx> wrote: > > The current error detection only looks for a timeout. > This should be changed to also check scratch_reg1 for any errors > returned from RLCG. > > v2: remove new error value > > Signed-off-by: Victor Lu <victorchengchi.lu@xxxxxxx> Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 5 +++-- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + > 2 files changed, 4 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > index 6ff7d3fb2008..7a4eae36778a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > @@ -979,7 +979,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f > * SCRATCH_REG0 = read/write value > * SCRATCH_REG1[30:28] = command > * SCRATCH_REG1[19:0] = address in dword > - * SCRATCH_REG1[26:24] = Error reporting > + * SCRATCH_REG1[27:24] = Error reporting > */ > writel(v, scratch_reg0); > writel((offset | flag), scratch_reg1); > @@ -993,7 +993,8 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f > udelay(10); > } > > - if (i >= timeout) { > + tmp = readl(scratch_reg1); > + if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) { > if (amdgpu_sriov_rlcg_error_report_enabled(adev)) { > if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) { > dev_err(adev->dev, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index fa7be5f277b9..3f59b7b5523f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -45,6 +45,7 @@ > #define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000 > > #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF > +#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000 > > /* all asic after AI use this offset */ > #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 > -- > 2.34.1 >