[AMD Official Use Only - AMD Internal Distribution Only] Reviewed-by: Tao Zhou <tao.zhou1@xxxxxxx> > -----Original Message----- > From: Hawking Zhang <Hawking.Zhang@xxxxxxx> > Sent: Thursday, August 1, 2024 1:55 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx> > Subject: [PATCH] drm/amdgpu: Add more types for boot time error reporting > > Data abort exception and unknown errors are supported. > > Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 ++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 ++ > 2 files changed, 12 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 12ab48f26bd5..7aff6150898b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -4769,6 +4769,16 @@ static void > amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, > dev_info(adev->dev, > "socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm > bist test failed\n", > socket_id, aid_id, hbm_id, fw_status); > + > + if (AMDGPU_RAS_GPU_ERR_DATA_ABORT(boot_error)) > + dev_info(adev->dev, > + "socket: %d, aid: %d, fw_status: 0x%x, data abort > exception\n", > + socket_id, aid_id, fw_status); > + > + if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error)) > + dev_info(adev->dev, > + "socket: %d, aid: %d, fw_status: 0x%x, unknown boot > time errors\n", > + socket_id, aid_id, fw_status); > } > > static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev, diff - > -git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 7ddd13d5c06b..0d49b74bfe5e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -46,6 +46,8 @@ struct amdgpu_iv_entry; > #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) > AMDGPU_GET_REG_FIELD(x, 10, 8) > #define AMDGPU_RAS_GPU_ERR_AID_ID(x) > AMDGPU_GET_REG_FIELD(x, 12, 11) > #define AMDGPU_RAS_GPU_ERR_HBM_ID(x) > AMDGPU_GET_REG_FIELD(x, 14, 13) > +#define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) > AMDGPU_GET_REG_FIELD(x, 29, 29) > +#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x) > AMDGPU_GET_REG_FIELD(x, 30, 30) > > #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100 > #define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA > -- > 2.17.1