[AMD Official Use Only - General] Simiar as patch #9 and #10, let's use macro to define the magic numbers + case 0x36430400: /* SMNAID XCD 0 */ + case 0x38430400: /* SMNAID XCD 1 */ + case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */ Regards, Hawking -----Original Message----- From: Wang, Yang(Kevin) <KevinYang.Wang@xxxxxxx> Sent: Wednesday, January 3, 2024 16:02 To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx>; Wang, Yang(Kevin) <KevinYang.Wang@xxxxxxx> Subject: [PATCH 08/14] drm/amdgpu: add gfx v9.4.3 ACA support add gfx v9.4.3 ACA driver support Signed-off-by: Yang Wang <kevinyang.wang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 85 +++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 131cddbdda0d..18b8e4dbe9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -38,6 +38,7 @@ #include "gfx_v9_4_3.h" #include "amdgpu_xcp.h" +#include "amdgpu_aca.h" MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); @@ -675,6 +676,67 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; +static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle, + struct aca_bank *bank, enum aca_error_type type, + struct aca_bank_report *report, void *data) { + u64 status, misc0; + u32 instlo; + int ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + if ((type == ACA_ERROR_TYPE_UE && + ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) || + (type == ACA_ERROR_TYPE_CE && + ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) { + + ret = aca_bank_info_decode(bank, &report->info); + if (ret) + return ret; + + /* NOTE: overwrite info.die_id with xcd id for gfx */ + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + report->info.die_id = instlo == 0x36430400 ? 0 : 1; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + report->count = ACA_REG__MISC0__ERRCNT(misc0); + report->type = type; + } + + return 0; +} + +static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_error_type type, void *data) { + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + switch (instlo) { + case 0x36430400: /* SMNAID XCD 0 */ + case 0x38430400: /* SMNAID XCD 1 */ + case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */ + return true; + default: + return false; + } + + return false; +} + +static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = { + .aca_bank_generate_report = gfx_v9_4_3_aca_bank_generate_report, + .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid, }; + +static const struct aca_info gfx_v9_4_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .bank_ops = &gfx_v9_4_3_aca_bank_ops, +}; + static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -4242,9 +4304,32 @@ struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = { .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count, }; +static int gfx_v9_4_3_ras_late_init(struct amdgpu_device *adev, struct +ras_common_if *ras_block) { + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__GFX, + &gfx_v9_4_3_aca_info, + NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + struct amdgpu_gfx_ras gfx_v9_4_3_ras = { .ras_block = { .hw_ops = &gfx_v9_4_3_ras_ops, + .ras_late_init = &gfx_v9_4_3_ras_late_init, }, .enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer, }; -- 2.34.1