Move each block error inject function from amdgpu_ras.c to each block. Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 62 +++++------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 28 +++++++++++ drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 18 +++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 16 ++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 16 ++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 16 ++++++ drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 16 ++++++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 16 ++++++ drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c | 16 ++++++ drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 16 ++++++ drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 16 ++++++ drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 16 ++++++ 12 files changed, 201 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2e38bd3d3d45..87b625d305c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1032,31 +1032,7 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, return 0; } -/* Trigger XGMI/WAFL error */ -static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, - struct ta_ras_trigger_error_input *block_info) -{ - int ret; - - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) - dev_warn(adev->dev, "Failed to disallow df cstate"); - if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) - dev_warn(adev->dev, "Failed to disallow XGMI power down"); - - ret = psp_ras_trigger_error(&adev->psp, block_info); - - if (amdgpu_ras_intr_triggered()) - return ret; - - if (amdgpu_dpm_allow_xgmi_power_down(adev, true)) - dev_warn(adev->dev, "Failed to allow XGMI power down"); - - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) - dev_warn(adev->dev, "Failed to allow df cstate"); - - return ret; -} /* wrapper of psp_ras_trigger_error */ int amdgpu_ras_error_inject(struct amdgpu_device *adev, @@ -1076,41 +1052,25 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, if (!obj) return -EINVAL; + if (!block_obj || !block_obj->ops) { + dev_info(adev->dev, "%s don't config ras function \n", get_ras_block_str(&info->head)); + return -EINVAL; + } + /* Calculate XGMI relative offset */ if (adev->gmc.xgmi.num_physical_nodes > 1) { - block_info.address = - amdgpu_xgmi_get_relative_phy_addr(adev, - block_info.address); + block_info.address = amdgpu_xgmi_get_relative_phy_addr(adev, block_info.address); } - switch (info->head.block) { - case AMDGPU_RAS_BLOCK__GFX: - if (!block_obj || !block_obj->ops) { - dev_info(adev->dev, "%s don't config ras function \n", get_ras_block_str(&info->head)); - return -EINVAL; - } - if (block_obj->ops->ras_error_inject) + if (block_obj->ops->ras_error_inject) { + if(info->head.block == AMDGPU_RAS_BLOCK__GFX) ret = block_obj->ops->ras_error_inject(adev, info); - break; - case AMDGPU_RAS_BLOCK__UMC: - case AMDGPU_RAS_BLOCK__SDMA: - case AMDGPU_RAS_BLOCK__MMHUB: - case AMDGPU_RAS_BLOCK__PCIE_BIF: - case AMDGPU_RAS_BLOCK__MCA: - ret = psp_ras_trigger_error(&adev->psp, &block_info); - break; - case AMDGPU_RAS_BLOCK__XGMI_WAFL: - ret = amdgpu_ras_error_inject_xgmi(adev, &block_info); - break; - default: - dev_info(adev->dev, "%s error injection is not supported yet\n", - get_ras_block_str(&info->head)); - ret = -EINVAL; + else + ret = block_obj->ops->ras_error_inject(adev, &block_info); } if (ret) - dev_err(adev->dev, "ras inject %s failed %d\n", - get_ras_block_str(&info->head), ret); + dev_err(adev->dev, "ras inject %s failed %d\n", get_ras_block_str(&info->head), ret); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index da541c7b1ec2..298742afba99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -940,6 +940,33 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += ce_cnt; } +/* Trigger XGMI/WAFL error */ +static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, + void *inject_if) +{ + int ret = 0;; + struct ta_ras_trigger_error_input *block_info = (struct ta_ras_trigger_error_input *)inject_if; + + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) + dev_warn(adev->dev, "Failed to disallow df cstate"); + + if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) + dev_warn(adev->dev, "Failed to disallow XGMI power down"); + + ret = psp_ras_trigger_error(&adev->psp, block_info); + + if (amdgpu_ras_intr_triggered()) + return ret; + + if (amdgpu_dpm_allow_xgmi_power_down(adev, true)) + dev_warn(adev->dev, "Failed to allow XGMI power down"); + + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) + dev_warn(adev->dev, "Failed to allow df cstate"); + + return ret; +} + static int amdgpu_xgmi_ras_block_match(struct amdgpu_ras_block_object* block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index) { if(!block_obj) @@ -958,6 +985,7 @@ struct amdgpu_ras_block_ops xgmi_ras_ops = { .ras_fini = amdgpu_xgmi_ras_fini, .query_ras_error_count = amdgpu_xgmi_query_ras_error_count, .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count, + .ras_error_inject = amdgpu_ras_error_inject_xgmi, }; struct amdgpu_xgmi_ras xgmi_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c index 99edc75ed4ec..ce6841967b05 100644 --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -60,12 +60,28 @@ static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object* block_obj, e return -EINVAL; } +static int mca_v3_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + const struct amdgpu_ras_block_ops mca_v3_0_mp0_ops = { .ras_block_match = mca_v3_0_ras_block_match, .ras_late_init = mca_v3_0_mp0_ras_late_init, .ras_fini = mca_v3_0_mp0_ras_fini, .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count, .query_ras_error_address = NULL, + .ras_error_inject = mca_v3_0_ras_error_inject, }; struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = { @@ -101,6 +117,7 @@ const struct amdgpu_ras_block_ops mca_v3_0_mp1_ops = { .ras_fini = mca_v3_0_mp1_ras_fini, .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count, .query_ras_error_address = NULL, + .ras_error_inject = mca_v3_0_ras_error_inject, }; struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = { @@ -136,6 +153,7 @@ const struct amdgpu_ras_block_ops mca_v3_0_mpio_ops = { .ras_fini = mca_v3_0_mpio_ras_fini, .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count, .query_ras_error_address = NULL, + .ras_error_inject = mca_v3_0_ras_error_inject, }; struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index da505314802a..7cca86c504e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -786,12 +786,28 @@ static int mmhub_v1_0_ras_block_match(struct amdgpu_ras_block_object* block_obj, return -EINVAL; } +static int mmhub_v1_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + struct amdgpu_ras_block_ops mmhub_v1_0_ras_ops = { .ras_block_match = mmhub_v1_0_ras_block_match, .ras_late_init = amdgpu_mmhub_ras_late_init, .ras_fini = amdgpu_mmhub_ras_fini, .query_ras_error_count = mmhub_v1_0_query_ras_error_count, .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count, + .ras_error_inject = mmhub_v1_0_ras_error_inject, }; struct amdgpu_mmhub_ras mmhub_v1_0_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 829d14ee87d3..79a9995caef1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -1333,6 +1333,21 @@ static int mmhub_v1_7_ras_block_match(struct amdgpu_ras_block_object* block_obj, return -EINVAL; } +static int mmhub_v1_7_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + struct amdgpu_ras_block_ops mmhub_v1_7_ras_ops = { .ras_block_match = mmhub_v1_7_ras_block_match, .ras_late_init = amdgpu_mmhub_ras_late_init, @@ -1341,6 +1356,7 @@ struct amdgpu_ras_block_ops mmhub_v1_7_ras_ops = { .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count, .query_ras_error_status = mmhub_v1_7_query_ras_error_status, .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status, + .ras_error_inject = mmhub_v1_7_ras_error_inject, }; struct amdgpu_mmhub_ras mmhub_v1_7_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 1edc98e5bcbb..eaed556b9551 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -1667,6 +1667,21 @@ static int mmhub_v9_4_ras_block_match(struct amdgpu_ras_block_object* block_obj, return -EINVAL; } +static int mmhub_v9_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + const struct amdgpu_ras_block_ops mmhub_v9_4_ras_ops = { .ras_block_match = mmhub_v9_4_ras_block_match, .ras_late_init = amdgpu_mmhub_ras_late_init, @@ -1674,6 +1689,7 @@ const struct amdgpu_ras_block_ops mmhub_v9_4_ras_ops = { .query_ras_error_count = mmhub_v9_4_query_ras_error_count, .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count, .query_ras_error_status = mmhub_v9_4_query_ras_error_status, + .ras_error_inject = mmhub_v9_4_ras_error_inject, }; struct amdgpu_mmhub_ras mmhub_v9_4_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 14f7265d954e..8e62e2ffabe5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -650,11 +650,27 @@ static int nbio_v7_4_ras_block_match(struct amdgpu_ras_block_object* block_obj, return -EINVAL; } +static int nbio_v7_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + const struct amdgpu_ras_block_ops nbio_v7_4_ras_ops = { .ras_block_match = nbio_v7_4_ras_block_match, .query_ras_error_count = nbio_v7_4_query_ras_error_count, .ras_late_init = amdgpu_nbio_ras_late_init, .ras_fini = amdgpu_nbio_ras_fini, + .ras_error_inject = nbio_v7_4_ras_error_inject, }; struct amdgpu_nbio_ras nbio_v7_4_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 30a651613776..578ee40cc0d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2803,11 +2803,27 @@ static int sdma_v4_0_ras_block_match(struct amdgpu_ras_block_object* block_obj, return -EINVAL; } +static int sdma_v4_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + const struct amdgpu_ras_block_ops sdma_v4_0_ras_ops = { .ras_block_match = sdma_v4_0_ras_block_match, .ras_fini = amdgpu_sdma_ras_fini, .query_ras_error_count = sdma_v4_0_query_ras_error_count, .reset_ras_error_count = sdma_v4_0_reset_ras_error_count, + .ras_error_inject = sdma_v4_0_ras_error_inject, }; static struct amdgpu_sdma_ras sdma_v4_0_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c index 8c165bcb0ffa..0656c6a7a2c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c @@ -270,11 +270,27 @@ static int sdma_v4_4_ras_block_match(struct amdgpu_ras_block_object* block_obj, return -EINVAL; } +static int sdma_v4_4_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + const struct amdgpu_ras_block_ops sdma_v4_4_ras_ops = { .ras_block_match = sdma_v4_4_ras_block_match, .ras_fini = amdgpu_sdma_ras_fini, .query_ras_error_count = sdma_v4_4_query_ras_error_count, .reset_ras_error_count = sdma_v4_4_reset_ras_error_count, + .ras_error_inject = sdma_v4_4_ras_error_inject, }; struct amdgpu_sdma_ras sdma_v4_4_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index ed480c2081a6..2058439b02cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -477,12 +477,28 @@ static int umc_v6_1_ras_block_match(struct amdgpu_ras_block_object* block_obj, e return -EINVAL; } +static int umc_v6_1_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + const struct amdgpu_ras_block_ops umc_v6_1_ras_ops = { .ras_block_match = umc_v6_1_ras_block_match, .ras_late_init = amdgpu_umc_ras_late_init, .ras_fini = amdgpu_umc_ras_fini, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, + .ras_error_inject = umc_v6_1_ras_error_inject, }; struct amdgpu_umc_ras umc_v6_1_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index e26728dbc6e9..2e87e7de4a55 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -333,6 +333,21 @@ static int umc_v6_7_ras_block_match(struct amdgpu_ras_block_object* block_obj, e return -EINVAL; } +static int umc_v6_7_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + const struct amdgpu_ras_block_ops umc_v6_7_ras_pos = { .ras_block_match = umc_v6_7_ras_block_match, .ras_late_init = amdgpu_umc_ras_late_init, @@ -340,6 +355,7 @@ const struct amdgpu_ras_block_ops umc_v6_7_ras_pos = { .query_ras_error_count = umc_v6_7_query_ras_error_count, .query_ras_error_address = umc_v6_7_query_ras_error_address, .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, + .ras_error_inject = umc_v6_7_ras_error_inject, }; struct amdgpu_umc_ras umc_v6_7_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index 037791e90c24..f7fb653434b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -336,12 +336,28 @@ static int umc_v8_7_ras_block_match(struct amdgpu_ras_block_object* block_obj, e return -EINVAL; } +static int umc_v8_7_ras_error_inject(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + if (!adev || !inject_if) { + dev_err(adev->dev, "%s invaild parameters \n", __func__); + return -EINVAL; + } + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, (struct ta_ras_trigger_error_input *)inject_if); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + const struct amdgpu_ras_block_ops umc_v8_7_ras_ops = { .ras_block_match = umc_v8_7_ras_block_match, .ras_late_init = amdgpu_umc_ras_late_init, .ras_fini = amdgpu_umc_ras_fini, .query_ras_error_count = umc_v8_7_query_ras_error_count, .query_ras_error_address = umc_v8_7_query_ras_error_address, + .ras_error_inject = umc_v8_7_ras_error_inject, }; struct amdgpu_umc_ras umc_v8_7_ras = { -- 2.25.1