1.Modify sdma block to fit for the unified ras block data and ops. 2.Change amdgpu_sdma_ras_funcs to amdgpu_sdma_ras, and the corresponding variable name remove _funcs suffix. 3.Remove the const flag of sdma ras variable so that sdma ras block can be able to be inserted into amdgpu device ras block link list. 4.Invoke amdgpu_ras_register_ras_block function to register sdma ras block into amdgpu device ras block link list. 5.Remove the redundant code about sdma in amdgpu_ras.c after using the unified ras block. 6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of sdma versions. If .ras_late_init and .ras_fini had been defined by the selected sdma version, the defined functions will take effect; if not defined, default fill them with amdgpu_sdma_ras_late_init and amdgpu_sdma_ras_fini. Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 12 ++--- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 58 ++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c | 25 ++++++++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h | 2 +- 5 files changed, 71 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 5a8fccfdb0bb..4ca51f623751 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -953,12 +953,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, block_obj->hw_ops->query_ras_error_address(adev, &err_data); break; case AMDGPU_RAS_BLOCK__SDMA: - if (adev->sdma.funcs->query_ras_error_count) { - for (i = 0; i < adev->sdma.num_instances; i++) - adev->sdma.funcs->query_ras_error_count(adev, i, - &err_data); - } - break; case AMDGPU_RAS_BLOCK__GFX: case AMDGPU_RAS_BLOCK__MMHUB: if (!block_obj || !block_obj->hw_ops) { @@ -1064,9 +1058,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, block_obj->hw_ops->reset_ras_error_status(adev); break; case AMDGPU_RAS_BLOCK__SDMA: - if (adev->sdma.funcs->reset_ras_error_count) - adev->sdma.funcs->reset_ras_error_count(adev); - break; case AMDGPU_RAS_BLOCK__HDP: if (!block_obj || !block_obj->hw_ops) { dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index f8fb755e3aa6..eaee12ab6518 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -23,6 +23,7 @@ #ifndef __AMDGPU_SDMA_H__ #define __AMDGPU_SDMA_H__ +#include "amdgpu_ras.h" /* max number of IP instances */ #define AMDGPU_MAX_SDMA_INSTANCES 8 @@ -50,13 +51,8 @@ struct amdgpu_sdma_instance { bool burst_nop; }; -struct amdgpu_sdma_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev, - void *ras_ih_info); - void (*ras_fini)(struct amdgpu_device *adev); - int (*query_ras_error_count)(struct amdgpu_device *adev, - uint32_t instance, void *ras_error_status); - void (*reset_ras_error_count)(struct amdgpu_device *adev); +struct amdgpu_sdma_ras { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_sdma { @@ -73,7 +69,7 @@ struct amdgpu_sdma { uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; - const struct amdgpu_sdma_ras_funcs *funcs; + struct amdgpu_sdma_ras *ras; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 69c9e460c1eb..5500f93f6ecd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1898,13 +1898,13 @@ static int sdma_v4_0_late_init(void *handle) sdma_v4_0_setup_ulv(adev); if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.funcs && - adev->sdma.funcs->reset_ras_error_count) - adev->sdma.funcs->reset_ras_error_count(adev); + if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && + adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) + adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); } - if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) - return adev->sdma.funcs->ras_late_init(adev, &ih_info); + if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init) + return adev->sdma.ras->ras_block.ras_late_init(adev, &ih_info); else return 0; } @@ -2007,8 +2007,9 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) - adev->sdma.funcs->ras_fini(adev); + if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && + adev->sdma.ras->ras_block.ras_fini) + adev->sdma.ras->ras_block.ras_fini(adev); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); @@ -2745,7 +2746,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value, } } -static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, +static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev, uint32_t instance, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -2767,6 +2768,18 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, return 0; }; +static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + int i = 0; + for (i = 0; i < adev->sdma.num_instances; i++) { + if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status)) + { + dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i); + return; + } + } +} + static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) { int i; @@ -2778,26 +2791,45 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) } } -static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { - .ras_late_init = amdgpu_sdma_ras_late_init, - .ras_fini = amdgpu_sdma_ras_fini, +const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = { .query_ras_error_count = sdma_v4_0_query_ras_error_count, .reset_ras_error_count = sdma_v4_0_reset_ras_error_count, }; +static struct amdgpu_sdma_ras sdma_v4_0_ras = { + .ras_block = { + .hw_ops = &sdma_v4_0_ras_hw_ops, + }, +}; + static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA20: case CHIP_ARCTURUS: - adev->sdma.funcs = &sdma_v4_0_ras_funcs; + adev->sdma.ras = &sdma_v4_0_ras; break; case CHIP_ALDEBARAN: - adev->sdma.funcs = &sdma_v4_4_ras_funcs; + adev->sdma.ras = &sdma_v4_4_ras; break; default: break; } + + if (adev->sdma.ras) { + amdgpu_ras_register_ras_block(adev, &adev->sdma.ras->ras_block); + + strcpy(adev->sdma.ras->ras_block.name,"sdma"); + adev->sdma.ras->ras_block.block = AMDGPU_RAS_BLOCK__SDMA; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->sdma.ras->ras_block.ras_late_init) + adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init; + + /* If don't define special ras_fini function, use default ras_fini */ + if (!adev->sdma.ras->ras_block.ras_fini) + adev->sdma.ras->ras_block.ras_fini = amdgpu_sdma_ras_fini; + } } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c index bf95007f0843..5c1ba1116e5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c @@ -188,7 +188,7 @@ static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev, } } -static int sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, +static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev, uint32_t instance, void *ras_error_status) { @@ -245,9 +245,26 @@ static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev) } } -const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs = { - .ras_late_init = amdgpu_sdma_ras_late_init, - .ras_fini = amdgpu_sdma_ras_fini, +static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + int i = 0; + for (i = 0; i < adev->sdma.num_instances; i++) { + if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) + { + dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i); + return; + } + } + +} + +const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = { .query_ras_error_count = sdma_v4_4_query_ras_error_count, .reset_ras_error_count = sdma_v4_4_reset_ras_error_count, }; + +struct amdgpu_sdma_ras sdma_v4_4_ras = { + .ras_block = { + .hw_ops = &sdma_v4_4_ras_hw_ops, + }, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h index 74a6e5b5e949..a9f0c68359e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h @@ -23,6 +23,6 @@ #ifndef __SDMA_V4_4_H__ #define __SDMA_V4_4_H__ -extern const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs; +extern struct amdgpu_sdma_ras sdma_v4_4_ras; #endif -- 2.25.1