[AMD Official Use Only - General] This Patch looks good to me. Reviewed-By: Frank.Min <Frank.Min@xxxxxxx> -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Hawking Zhang Sent: Monday, January 29, 2024 10:35 PM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx> Subject: [PATCH] drm/amdgpu: Update boot time errors polling sequence Update boot time errors polling seqeunce to align with the latest firmware change. Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 14 +++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 5 +++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9e67355d4718..9b7a5c1c9af5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -4122,6 +4122,18 @@ static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev, u32 reg_data; int retry_loop; + reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) { + *boot_error = AMDGPU_RAS_BOOT_SUCEESS; + return 0; + } + msleep(1); + } + /* The pattern for smn addressing in other SOC could be different from * the one for aqua_vanjaram. We should revisit the code if the pattern * is changed. In such case, replace the aqua_vanjaram implementation @@ -4129,7 +4141,7 @@ static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev, reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) + aqua_vanjaram_encode_ext_smn_addressing(instance); - for (retry_loop = 0; retry_loop < 1000; retry_loop++) { + for (retry_loop = 0; retry_loop < +AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) { *boot_error = reg_data; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 0b6ffae1e8bb..d10e5bb0e52f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -46,6 +46,11 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 13, 13) #define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31) +#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000 +#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA +#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF +#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000 + #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) /* position of instance value in sub_block_index of * ta_ras_trigger_error_input, the sub block uses lower 12 bits -- 2.17.1