The KFD part looks good to me, other than the SDMA comment that Guchun pointed out. With that fixed this patch is Acked-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> Thanks, Felix Am 2020-08-24 um 6:33 a.m. schrieb Stanley.Yang: > The ctx->features are new RAS implementation which > is only available for Vega20 and onwards, it is not > available for vega10, vega10 should follow legacy > ECC implementation. > > Changed from V1: > wrap function to initialize kfd node properties > > Changed from V2: > remove wrap funcion, remove SRMA ECC check > > Change-Id: I1e3ff899bf066611fe5775e67104ce2e0bf8b7d0 > Signed-off-by: Stanley.Yang <Stanley.Yang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 16 ++++++++------- > drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 24 +++++++++++------------ > 3 files changed, 21 insertions(+), 20 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 1f9d97f61aa5..573e2712df35 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -986,6 +986,7 @@ struct amdgpu_device { > > atomic_t throttling_logging_enabled; > struct ratelimit_state throttling_logging_rs; > + uint32_t ras_features; > }; > > static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index cd1403f83dcf..d462244863f6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -1974,7 +1974,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, > *supported = 0; > > if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw || > - (adev->asic_type != CHIP_VEGA20 && > + (adev->asic_type != CHIP_VEGA10 && > + adev->asic_type != CHIP_VEGA20 && > adev->asic_type != CHIP_ARCTURUS && > adev->asic_type != CHIP_SIENNA_CICHLID)) > return; > @@ -1998,6 +1999,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, > > *supported = amdgpu_ras_enable == 0 ? > 0 : *hw_supported & amdgpu_ras_mask; > + adev->ras_features = *supported; > } > > int amdgpu_ras_init(struct amdgpu_device *adev) > @@ -2020,9 +2022,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev) > > amdgpu_ras_check_supported(adev, &con->hw_supported, > &con->supported); > - if (!con->hw_supported) { > + if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) { > r = 0; > - goto err_out; > + goto release_con; > } > > con->features = 0; > @@ -2033,25 +2035,25 @@ int amdgpu_ras_init(struct amdgpu_device *adev) > if (adev->nbio.funcs->init_ras_controller_interrupt) { > r = adev->nbio.funcs->init_ras_controller_interrupt(adev); > if (r) > - goto err_out; > + goto release_con; > } > > if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) { > r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev); > if (r) > - goto err_out; > + goto release_con; > } > > if (amdgpu_ras_fs_init(adev)) { > r = -EINVAL; > - goto err_out; > + goto release_con; > } > > dev_info(adev->dev, "RAS INFO: ras initialized successfully, " > "hardware ability[%x] ras_mask[%x]\n", > con->hw_supported, con->supported); > return 0; > -err_out: > +release_con: > amdgpu_ras_set_context(adev, NULL); > kfree(con); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > index f185f6cbc05c..0ba960a17ead 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > @@ -1239,7 +1239,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) > void *crat_image = NULL; > size_t image_size = 0; > int proximity_domain; > - struct amdgpu_ras *ctx; > + struct amdgpu_device *adev; > > INIT_LIST_HEAD(&temp_topology_device_list); > > @@ -1404,19 +1404,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu) > dev->node_props.max_waves_per_simd = 10; > } > > - ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd)); > - if (ctx) { > - /* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */ > - dev->node_props.capability |= > - (((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) || > - ((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ? > - HSA_CAP_SRAM_EDCSUPPORTED : 0; > - dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? > - HSA_CAP_MEM_EDCSUPPORTED : 0; > - > - dev->node_props.capability |= (ctx->features != 0) ? > + adev = (struct amdgpu_device *)(dev->gpu->kgd); > + /* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */ > + dev->node_props.capability |= > + ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? > + HSA_CAP_SRAM_EDCSUPPORTED : 0; > + dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? > + HSA_CAP_MEM_EDCSUPPORTED : 0; > + > + if (adev->asic_type != CHIP_VEGA10) > + dev->node_props.capability |= (adev->ras_features != 0) ? > HSA_CAP_RASEVENTNOTIFY : 0; > - } > > kfd_debug_print_topology(); > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx