1.Modify gmc block to fit for the unified ras block data and ops 2.Implement .ras_block_match function pointer for gmc block to identify itself. 3.Change amdgpu_xgmi_ras_funcs to amdgpu_xgmi_ras, and the corresponding variable name remove _funcs suffix. 4.Remove the const flag of gmc ras variable so that gmc ras block can be able to be insertted into amdgpu device ras block link list. 5.Invoke amdgpu_ras_register_ras_block function to register gmc ras block into amdgpu device ras block link list. 6.Remove the redundant code about gmc in amdgpu_ras.c after using the unified ras block. Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 18 ++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 11 +++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 +++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 31 +++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 4 +-- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 83f26bca7dac..3ba2f0f1f1b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -448,12 +448,14 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) return r; } - if (!adev->gmc.xgmi.connected_to_cpu) - adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs; + if (!adev->gmc.xgmi.connected_to_cpu) { + adev->gmc.xgmi.ras = &xgmi_ras; + amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); + } - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->ras_late_init) { - r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev); + if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ops && + adev->gmc.xgmi.ras->ras_block.ops->ras_late_init) { + r = adev->gmc.xgmi.ras->ras_block.ops->ras_late_init(adev); if (r) return r; } @@ -499,9 +501,9 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) adev->mmhub.ras_funcs->ras_fini) adev->mmhub.ras_funcs->ras_fini(adev); - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->ras_fini) - adev->gmc.xgmi.ras_funcs->ras_fini(adev); + if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ops && + adev->gmc.xgmi.ras->ras_block.ops->ras_fini) + adev->gmc.xgmi.ras->ras_block.ops->ras_fini(adev); if (adev->hdp.ras_funcs && adev->hdp.ras_funcs->ras_fini) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e55201134a01..923db5ff5859 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -29,6 +29,7 @@ #include <linux/types.h> #include "amdgpu_irq.h" +#include "amdgpu_ras.h" /* VA hole for 48bit addresses on Vega10 */ #define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL @@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs { unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); }; -struct amdgpu_xgmi_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - int (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*reset_ras_error_count)(struct amdgpu_device *adev); +struct amdgpu_xgmi_ras { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_xgmi { @@ -159,7 +156,7 @@ struct amdgpu_xgmi { struct ras_common_if *ras_if; bool connected_to_cpu; bool pending_reset; - const struct amdgpu_xgmi_ras_funcs *ras_funcs; + struct amdgpu_xgmi_ras *ras; }; struct amdgpu_gmc { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 190a4a4e9d7a..a6a2f928c6ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -970,9 +970,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data); break; case AMDGPU_RAS_BLOCK__XGMI_WAFL: - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->query_ras_error_count) - adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data); + if (!block_obj || !block_obj->ops) { + dev_info(adev->dev, "%s don't config ras function \n", + get_ras_block_str(&info->head)); + return -EINVAL; + } + if (block_obj->ops->query_ras_error_count) + block_obj->ops->query_ras_error_count(adev, &err_data); break; case AMDGPU_RAS_BLOCK__HDP: if (adev->hdp.ras_funcs && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 0d149f5f000e..da541c7b1ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -739,7 +739,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) adev->gmc.xgmi.num_physical_nodes == 0) return 0; - adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); + adev->gmc.xgmi.ras->ras_block.ops->reset_ras_error_count(adev); if (!adev->gmc.xgmi.ras_if) { adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); @@ -859,7 +859,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, return 0; } -static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, +static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -868,7 +868,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, uint32_t ue_cnt = 0, ce_cnt = 0; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) - return -EINVAL; + return ; err_data->ue_count = 0; err_data->ce_count = 0; @@ -934,17 +934,36 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, break; } - adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); + adev->gmc.xgmi.ras->ras_block.ops->reset_ras_error_count(adev); err_data->ue_count += ue_cnt; err_data->ce_count += ce_cnt; +} - return 0; +static int amdgpu_xgmi_ras_block_match(struct amdgpu_ras_block_object* block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index) +{ + if(!block_obj) + return -EINVAL; + + if(block_obj->block == block) { + return 0; + } + + return -EINVAL; } -const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = { +struct amdgpu_ras_block_ops xgmi_ras_ops = { + .ras_block_match = amdgpu_xgmi_ras_block_match, .ras_late_init = amdgpu_xgmi_ras_late_init, .ras_fini = amdgpu_xgmi_ras_fini, .query_ras_error_count = amdgpu_xgmi_query_ras_error_count, .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count, }; + +struct amdgpu_xgmi_ras xgmi_ras = { + .ras_block = { + .name = "xgmi", + .block = AMDGPU_RAS_BLOCK__XGMI_WAFL, + .ops = &xgmi_ras_ops, + } +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index d2189bf7d428..0afca51c3c0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -24,7 +24,7 @@ #include <drm/task_barrier.h> #include "amdgpu_psp.h" - +#include "amdgpu_ras.h" struct amdgpu_hive_info { struct kobject kobj; @@ -50,7 +50,7 @@ struct amdgpu_pcs_ras_field { uint32_t pcs_err_shift; }; -extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs; +extern struct amdgpu_xgmi_ras xgmi_ras; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); -- 2.25.1