[AMD Official Use Only - General] > -----Original Message----- > From: Wang, Yang(Kevin) <KevinYang.Wang@xxxxxxx> > Sent: Wednesday, April 17, 2024 11:10 AM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Zhou1, Tao > <Tao.Zhou1@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx> > Subject: [PATCH] drm/amdgpu: add ACA error query support for umc_v12_0 > > add ACA error query support for umc_v12_0. > > Signed-off-by: Yang Wang <kevinyang.wang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 +++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 4 ++++ > drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 18 ++++++++++++++---- > 3 files changed, 21 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 352ce16a0963..46b7f0c5cd8a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -1268,9 +1268,9 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device > *adev, enum amdgpu_ras_block blk) > return 0; > } > > -static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum > amdgpu_ras_block blk, > - enum aca_error_type type, struct > ras_err_data *err_data, > - struct ras_query_context *qctx) > +int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum > amdgpu_ras_block blk, > + enum aca_error_type type, struct ras_err_data > *err_data, > + struct ras_query_context *qctx) > { > struct ras_manager *obj; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 8d26989c75c8..487548879c49 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -898,6 +898,10 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device > *adev, enum amdgpu_ras_block blk) ssize_t amdgpu_ras_aca_sysfs_read(struct > device *dev, struct device_attribute *attr, > struct aca_handle *handle, char *buf, void > *data); > > +int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum > amdgpu_ras_block blk, > + enum aca_error_type type, struct ras_err_data > *err_data, > + struct ras_query_context *qctx); [Tao] is it used in this patch? > + > void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, > struct ras_err_addr *err_addr); > > diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c > index f69871902233..9f2c46814a4f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c > @@ -317,16 +317,26 @@ static int umc_v12_0_err_cnt_init_per_channel(struct > amdgpu_device *adev, static void > umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, > void *ras_error_status) > { > + struct ras_err_data *err_data = (struct ras_err_data > +*)ras_error_status; > struct ras_query_context qctx; > > memset(&qctx, 0, sizeof(qctx)); > qctx.event_id = amdgpu_ras_acquire_event_id(adev, > amdgpu_ras_intr_triggered() ? > RAS_EVENT_TYPE_ISR : > RAS_EVENT_TYPE_INVALID); > > - amdgpu_mca_smu_log_ras_error(adev, > - AMDGPU_RAS_BLOCK__UMC, > AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status, &qctx); > - amdgpu_mca_smu_log_ras_error(adev, > - AMDGPU_RAS_BLOCK__UMC, > AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status, &qctx); > + if (amdgpu_aca_is_enabled(adev)) { > + amdgpu_aca_get_error_data(adev, > AMDGPU_RAS_BLOCK__UMC, ACA_ERROR_TYPE_CE, > + err_data, &qctx); > + amdgpu_aca_get_error_data(adev, > AMDGPU_RAS_BLOCK__UMC, ACA_ERROR_TYPE_UE, > + err_data, &qctx); > + amdgpu_aca_get_error_data(adev, > AMDGPU_RAS_BLOCK__UMC, ACA_ERROR_TYPE_DEFERRED, > + err_data, &qctx); > + } else { > + amdgpu_mca_smu_log_ras_error(adev, > AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, > + err_data, &qctx); > + amdgpu_mca_smu_log_ras_error(adev, > AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, > + err_data, &qctx); > + } > } > > static void umc_v12_0_ecc_info_query_ras_error_address(struct > amdgpu_device *adev, > -- > 2.34.1