add ACA error query support for umc_v12_0. Signed-off-by: Yang Wang <kevinyang.wang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 18 ++++++++++++++---- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 352ce16a0963..46b7f0c5cd8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1268,9 +1268,9 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) return 0; } -static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum aca_error_type type, struct ras_err_data *err_data, - struct ras_query_context *qctx) +int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) { struct ras_manager *obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 8d26989c75c8..487548879c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -898,6 +898,10 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, struct aca_handle *handle, char *buf, void *data); +int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx); + void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index f69871902233..9f2c46814a4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -317,16 +317,26 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct ras_query_context qctx; memset(&qctx, 0, sizeof(qctx)); qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ? RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID); - amdgpu_mca_smu_log_ras_error(adev, - AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status, &qctx); - amdgpu_mca_smu_log_ras_error(adev, - AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status, &qctx); + if (amdgpu_aca_is_enabled(adev)) { + amdgpu_aca_get_error_data(adev, AMDGPU_RAS_BLOCK__UMC, ACA_ERROR_TYPE_CE, + err_data, &qctx); + amdgpu_aca_get_error_data(adev, AMDGPU_RAS_BLOCK__UMC, ACA_ERROR_TYPE_UE, + err_data, &qctx); + amdgpu_aca_get_error_data(adev, AMDGPU_RAS_BLOCK__UMC, ACA_ERROR_TYPE_DEFERRED, + err_data, &qctx); + } else { + amdgpu_mca_smu_log_ras_error(adev, AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, + err_data, &qctx); + amdgpu_mca_smu_log_ras_error(adev, AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, + err_data, &qctx); + } } static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev, -- 2.34.1