[AMD Official Use Only] With my concern in comment fixed, the series is: Reviewed-by: Tao Zhou <tao.zhou1@xxxxxxx> > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of yipechai > Sent: Wednesday, February 16, 2022 4:08 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Zhang, Hawking > <Hawking.Zhang@xxxxxxx>; Clements, John <John.Clements@xxxxxxx>; > Chai, Thomas <YiPeng.Chai@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx> > Subject: [PATCH V2 6/7] drm/amdgpu: define amdgpu_ras_late_init to call all ras > blocks' .ras_late_init > > Define amdgpu_ras_late_init to call all ras blocks' .ras_late_init. > > Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 44 ---------------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 25 ++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 --- > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 +-- > drivers/gpu/drm/amd/amdgpu/soc15.c | 6 +-- > 7 files changed, 34 insertions(+), 59 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index a74a1b74a172..d90388dd5362 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2625,6 +2625,12 @@ static int amdgpu_device_ip_late_init(struct > amdgpu_device *adev) > adev->ip_blocks[i].status.late_initialized = true; > } > > + r = amdgpu_ras_late_init(adev); > + if (r) { > + DRM_ERROR("amdgpu_ras_late_init failed %d", r); [Tao]: we already have debug message in amdgpu_ras_late_init, I think the print here is unnecessary. > + return r; > + } > + > amdgpu_ras_set_error_query_ready(adev, true); > > amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index ebf4194b0699..49dd81c0db2d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -449,50 +449,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device > *adev) > > int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) { > - int r; > - > - if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) { > - r = adev->umc.ras->ras_block.ras_late_init(adev, adev- > >umc.ras_if); > - if (r) > - return r; > - } > - > - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) { > - r = adev->mmhub.ras->ras_block.ras_late_init(adev, adev- > >mmhub.ras_if); > - if (r) > - return r; > - } > - > - if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) > { > - r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, adev- > >gmc.xgmi.ras_if); > - if (r) > - return r; > - } > - > - if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_late_init) { > - r = adev->hdp.ras->ras_block.ras_late_init(adev, adev- > >hdp.ras_if); > - if (r) > - return r; > - } > - > - if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ras_late_init) > { > - r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, adev- > >mca.mp0.ras_if); > - if (r) > - return r; > - } > - > - if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ras_late_init) > { > - r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, adev- > >mca.mp1.ras_if); > - if (r) > - return r; > - } > - > - if (adev->mca.mpio.ras && adev->mca.mpio.ras- > >ras_block.ras_late_init) { > - r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, adev- > >mca.mpio.ras_if); > - if (r) > - return r; > - } > - > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index 1aff88fcea76..b5286a0d9c8a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -2522,6 +2522,31 @@ void amdgpu_ras_suspend(struct amdgpu_device > *adev) > amdgpu_ras_disable_all_features(adev, 1); } > > +int amdgpu_ras_late_init(struct amdgpu_device *adev) { > + struct amdgpu_ras_block_list *node, *tmp; > + struct amdgpu_ras_block_object *obj; > + int r; > + > + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { > + if (!node->ras_obj) { > + dev_warn(adev->dev, "Warning: abnormal ras list > node.\n"); > + continue; > + } > + obj = node->ras_obj; > + if (obj->ras_late_init) { > + r = obj->ras_late_init(adev, &obj->ras_comm); > + if (r) { > + dev_err(adev->dev, "%s failed to execute > ras_late_init! ret:%d\n", > + obj->ras_comm.name, r); > + return r; > + } > + } > + } > + > + return 0; > +} > + > /* do some fini work before IP fini as dependence */ int > amdgpu_ras_pre_fini(struct amdgpu_device *adev) { diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 837d1b79a9cb..143a83043d7c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -595,6 +595,7 @@ amdgpu_ras_error_to_ta(enum > amdgpu_ras_error_type error) { > > /* called in ip_init and ip_fini */ > int amdgpu_ras_init(struct amdgpu_device *adev); > +int amdgpu_ras_late_init(struct amdgpu_device *adev); > int amdgpu_ras_fini(struct amdgpu_device *adev); int > amdgpu_ras_pre_fini(struct amdgpu_device *adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index bb40ab83fc22..1997f129db9c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -4791,12 +4791,6 @@ static int gfx_v9_0_ecc_late_init(void *handle) > if (r) > return r; > > - if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_late_init) { > - r = adev->gfx.ras->ras_block.ras_late_init(adev, adev- > >gfx.ras_if); > - if (r) > - return r; > - } > - > if (adev->gfx.ras && > adev->gfx.ras->enable_watchdog_timer) > adev->gfx.ras->enable_watchdog_timer(adev); > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index af5a1c93861b..e26c39fcd336 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -1894,10 +1894,7 @@ static int sdma_v4_0_late_init(void *handle) > adev->sdma.ras->ras_block.hw_ops- > >reset_ras_error_count(adev); > } > > - if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init) > - return adev->sdma.ras->ras_block.ras_late_init(adev, adev- > >sdma.ras_if); > - else > - return 0; > + return 0; > } > > static int sdma_v4_0_sw_init(void *handle) diff --git > a/drivers/gpu/drm/amd/amdgpu/soc15.c > b/drivers/gpu/drm/amd/amdgpu/soc15.c > index 464d635a0487..ba983398c9d3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/soc15.c > +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c > @@ -1186,15 +1186,11 @@ static int soc15_common_early_init(void *handle) > static int soc15_common_late_init(void *handle) { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > - int r = 0; > > if (amdgpu_sriov_vf(adev)) > xgpu_ai_mailbox_get_irq(adev); > > - if (adev->nbio.ras && adev->nbio.ras->ras_block.ras_late_init) > - r = adev->nbio.ras->ras_block.ras_late_init(adev, adev- > >nbio.ras_if); > - > - return r; > + return 0; > } > > static int soc15_common_sw_init(void *handle) > -- > 2.25.1