RAS error address translation algorithm is common across dGPU and A + A platform as along as the SOC integrates the same generation of UMC IP. UMC RAS is managed by x86 MCA on A + A platform, umc_ras in GPU driver is not initialized at all on A + A platform. In such case, any umc_ras callback implemented for dGPU config shouldn't be invoked from A + A specific callback. The change moves convert_error_address out of dGPU umc_ras structure and makes it share between A + A and dGPU config. Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Reviewed-by: Stanley Yang <Stanley.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 3 --- drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/umc_v6_7.h | 4 +++- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 75f1402101f4..41b3081fe415 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -36,6 +36,7 @@ #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "atom.h" #include "amdgpu_reset.h" +#include "umc_v6_7.h" #ifdef CONFIG_X86_MCE_AMD #include <asm/mce.h> @@ -2885,10 +2886,17 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, /* * Translate UMC channel address to Physical address */ - if (adev->umc.ras && - adev->umc.ras->convert_ras_error_address) - adev->umc.ras->convert_ras_error_address(adev, - &err_data, m->addr, ch_inst, umc_inst); + switch (adev->ip_versions[UMC_HWIP][0]) { + case IP_VERSION(6, 7, 0): + umc_v6_7_convert_error_address(adev, + &err_data, m->addr, ch_inst, umc_inst); + break; + default: + dev_warn(adev->dev, + "UMC address to Physical address translation is not supported\n"); + kfree(err_data.err_addr); + return NOTIFY_DONE; + } if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index e46439274f3a..3629d8f292ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -51,9 +51,6 @@ struct amdgpu_umc_ras { struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); bool (*query_ras_poison_mode)(struct amdgpu_device *adev); - void (*convert_ras_error_address)(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst); void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 5d5d031c9e7d..72fd963f178b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -187,9 +187,9 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, } } -static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst) +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst) { uint32_t channel_index; uint64_t soc_pa, retired_page, column; @@ -553,5 +553,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = { .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, - .convert_ras_error_address = umc_v6_7_convert_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index fe41ed2f5945..105245d5b6e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -71,5 +71,7 @@ extern const uint32_t umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; extern const uint32_t umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; - +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst); #endif -- 2.17.1