[AMD Official Use Only - General] > -----Original Message----- > From: Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Sent: Tuesday, June 13, 2023 3:08 PM > To: Yang, Stanley <Stanley.Yang@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx; > Zhang, Hawking <Hawking.Zhang@xxxxxxx> > Cc: Yang, Stanley <Stanley.Yang@xxxxxxx> > Subject: RE: [PATCH Review 1/2] drm/amdgpu: Optimze checking ras > supported > > [AMD Official Use Only - General] > > [Tao] typo in title: Optimze -> Optimize [Stanley]: Thanks Tao, will update before submitting. Regards, Stanley > > > -----Original Message----- > > From: Stanley.Yang <Stanley.Yang@xxxxxxx> > > Sent: Tuesday, June 13, 2023 11:53 AM > > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking > > <Hawking.Zhang@xxxxxxx>; Zhou1, Tao <Tao.Zhou1@xxxxxxx> > > Cc: Yang, Stanley <Stanley.Yang@xxxxxxx> > > Subject: [PATCH Review 1/2] drm/amdgpu: Optimze checking ras supported > > > > Using "is_app_apu" to identify device in the native APU mode or carveout > mode. > > > > Signed-off-by: Stanley.Yang <Stanley.Yang@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 +++--- > > drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 34 ++++++++++++++----- > ------ > > 3 files changed, 23 insertions(+), 21 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > > index 78bacea951a9..352e958b190a 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > > @@ -1653,7 +1653,7 @@ int psp_ras_initialize(struct psp_context *psp) > > > > if (amdgpu_ras_is_poison_mode_supported(adev)) > > ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; > > - if (!adev->gmc.xgmi.connected_to_cpu) > > + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) > > ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; > > ras_cmd->ras_in_message.init_flags.xcc_mask = > > adev->gfx.xcc_mask; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > index 7a0924469e4f..56bb0db207b9 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > @@ -1689,8 +1689,7 @@ static void > > amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * > > } > > } > > > > - if (!adev->gmc.xgmi.connected_to_cpu) > > - amdgpu_umc_poison_handler(adev, false); > > + amdgpu_umc_poison_handler(adev, false); > > > > if (block_obj->hw_ops && block_obj->hw_ops- > > >handle_poison_consumption) > > poison_stat = block_obj->hw_ops- > > >handle_poison_consumption(adev); > > @@ -2458,11 +2457,10 @@ static void > amdgpu_ras_check_supported(struct > > amdgpu_device *adev) { > > adev->ras_hw_enabled = adev->ras_enabled = 0; > > > > - if (!adev->is_atom_fw || > > - !amdgpu_ras_asic_supported(adev)) > > + if (!amdgpu_ras_asic_supported(adev)) > > return; > > > > - if (!adev->gmc.xgmi.connected_to_cpu) { > > + if (!adev->gmc.xgmi.connected_to_cpu && !adev- > > [Tao] the tab should be replaced with space. > > > >gmc.is_app_apu) { > > if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { > > dev_info(adev->dev, "MEM ECC is active.\n"); > > adev->ras_hw_enabled |= (1 << > > AMDGPU_RAS_BLOCK__UMC | diff --git > > a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > index 1edf8e6aeb16..db0d94ca4ffc 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c > > @@ -169,27 +169,31 @@ int amdgpu_umc_poison_handler(struct > > amdgpu_device *adev, bool reset) { > > int ret = AMDGPU_RAS_SUCCESS; > > > > - if (!amdgpu_sriov_vf(adev)) { > > - if (!adev->gmc.xgmi.connected_to_cpu) { > > - struct ras_err_data err_data = {0, 0, 0, NULL}; > > - struct ras_common_if head = { > > - .block = AMDGPU_RAS_BLOCK__UMC, > > - }; > > - struct ras_manager *obj = amdgpu_ras_find_obj(adev, > > &head); > > - > > - ret = amdgpu_umc_do_page_retirement(adev, > > &err_data, NULL, reset); > > - > > - if (ret == AMDGPU_RAS_SUCCESS && obj) { > > - obj->err_data.ue_count += err_data.ue_count; > > - obj->err_data.ce_count += err_data.ce_count; > > - } > > - } else if (reset) { > > + if (adev->gmc.xgmi.connected_to_cpu || > > + adev->gmc.is_app_apu) { > > + if (reset) { > > /* MCA poison handler is only responsible for GPU reset, > > * let MCA notifier do page retirement. > > */ > > kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); > > amdgpu_ras_reset_gpu(adev); > > } > > + return ret; > > + } > > + > > + if (!amdgpu_sriov_vf(adev)) { > > + struct ras_err_data err_data = {0, 0, 0, NULL}; > > + struct ras_common_if head = { > > + .block = AMDGPU_RAS_BLOCK__UMC, > > + }; > > + struct ras_manager *obj = amdgpu_ras_find_obj(adev, > > + &head); > > + > > + ret = amdgpu_umc_do_page_retirement(adev, &err_data, > > + NULL, > > reset); > > + > > + if (ret == AMDGPU_RAS_SUCCESS && obj) { > > + obj->err_data.ue_count += err_data.ue_count; > > + obj->err_data.ce_count += err_data.ce_count; > > + } > > } else { > > if (adev->virt.ops && adev->virt.ops->ras_poison_handler) > > adev->virt.ops->ras_poison_handler(adev); > > -- > > 2.17.1 >