So upper layer can return failure directly if address conversion fails. Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 19 +++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 12 ++++++++---- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index c0433e6471f5..3199dca8f1ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -464,11 +464,14 @@ int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, addr_out.pa.pa = pa_addr; - if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) - adev->umc.ras->convert_ras_err_addr(adev, &err_data, NULL, + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { + ret = adev->umc.ras->convert_ras_err_addr(adev, &err_data, NULL, &addr_out, false); - else + if (ret) + goto out; + } else { goto out; + } for (i = 0; i < adev->umc.retire_unit; i++) { if (pos >= len) @@ -490,6 +493,7 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, { struct ta_ras_query_address_input addr_in; struct ta_ras_query_address_output addr_out; + int ret; memset(&addr_in, 0, sizeof(addr_in)); addr_in.ma.err_addr = err_addr; @@ -498,11 +502,14 @@ int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, addr_in.ma.node_inst = node; addr_in.ma.socket_id = socket; - if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) - adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in, + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { + ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in, &addr_out, dump_addr); - else + if (ret) + return ret; + } else { return 0; + } *addr = addr_out.pa.pa; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index abde7597bda8..f45408a6ff03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -70,7 +70,7 @@ struct amdgpu_umc_ras { enum amdgpu_mca_error_type type, void *ras_error_status); int (*update_ecc_status)(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); - void (*convert_ras_err_addr)(struct amdgpu_device *adev, + int (*convert_ras_err_addr)(struct amdgpu_device *adev, struct ras_err_data *err_data, struct ta_ras_query_address_input *addr_in, struct ta_ras_query_address_output *addr_out, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 9b93ff769b86..ce60fd6675ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -173,7 +173,7 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } -static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, +static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, struct ta_ras_query_address_input *addr_in, struct ta_ras_query_address_output *addr_out, @@ -183,6 +183,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, uint64_t soc_pa, retired_page, column, err_addr; struct ta_ras_query_address_output addr_out_tmp; struct ta_ras_query_address_output *paddr_out; + int ret = 0; if (!addr_out) paddr_out = &addr_out_tmp; @@ -193,11 +194,12 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, if (addr_in) { err_addr = addr_in->ma.err_addr; addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, paddr_out)) { + ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out); + if (ret) { dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", err_addr); - return; + return ret; } bank = paddr_out->pa.bank; @@ -209,7 +211,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, soc_pa = paddr_out->pa.pa; if (!err_data && !dump_addr) - return; + return ret; col = (err_addr >> 1) & 0x1fULL; /* clear [C3 C2] in soc physical address */ @@ -241,6 +243,8 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, amdgpu_umc_fill_error_record(err_data, err_addr, retired_page, channel_index, umc_inst); } + + return ret; } static int umc_v12_0_query_error_address(struct amdgpu_device *adev, -- 2.34.1