The current error detection only looks for a timeout. This should be changed to also check scratch_reg1 for any errors returned from RLCG. Also add a new error value. Signed-off-by: Victor Lu <victorchengchi.lu@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0dcff2889e25..3cd085569515 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1022,7 +1022,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f * SCRATCH_REG0 = read/write value * SCRATCH_REG1[30:28] = command * SCRATCH_REG1[19:0] = address in dword - * SCRATCH_REG1[26:24] = Error reporting + * SCRATCH_REG1[27:24] = Error reporting */ writel(v, scratch_reg0); writel((offset | flag), scratch_reg1); @@ -1036,7 +1036,8 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f udelay(10); } - if (i >= timeout) { + tmp = readl(scratch_reg1); + if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) { if (amdgpu_sriov_rlcg_error_report_enabled(adev)) { if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) { dev_err(adev->dev, @@ -1047,6 +1048,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) { dev_err(adev->dev, "register is not in range, rlcg failed to program reg: 0x%05x\n", offset); + } else if (tmp & AMDGPU_RLCG_INVALID_XCD_ACCESS) { + dev_err(adev->dev, + "invalid xcd access, rlcg failed to program reg: 0x%05x\n", offset); } else { dev_err(adev->dev, "unknown error type, rlcg failed to program reg: 0x%05x\n", offset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index d4207e44141f..447af2e4aef0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -40,11 +40,13 @@ #define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28) /* error code for indirect register access path supported by rlcg for sriov */ +#define AMDGPU_RLCG_INVALID_XCD_ACCESS 0x8000000 #define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000 #define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000 #define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000 #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF +#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000 /* all asic after AI use this offset */ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 -- 2.34.1