> -----Original Message----- > From: Zhang, Hawking <Hawking.Zhang@xxxxxxx> > Sent: Monday, March 6, 2023 10:32 AM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhou1, Tao <Tao.Zhou1@xxxxxxx>; > Yang, Stanley <Stanley.Yang@xxxxxxx>; Li, Candice <Candice.Li@xxxxxxx>; > Chai, Thomas <YiPeng.Chai@xxxxxxx> > Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx> > Subject: [PATCH 09/11] drm/amdgpu: Rework xgmi_wafl_pcs ras sw_init > > To align with other IP blocks. > > Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 9 +++++--- > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 28 > +++++++++++++++++++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 1 + > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 ++++++ > 4 files changed, 37 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index 524e2c9b3012..d4685d22be60 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -500,9 +500,12 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device > *adev) > > /* xgmi ras block */ > if (amdgpu_ras_is_supported(adev, > AMDGPU_RAS_BLOCK__XGMI_WAFL)) { > - adev->gmc.xgmi.ras = &xgmi_ras; > - amdgpu_ras_register_ras_block(adev, &adev- > >gmc.xgmi.ras->ras_block); > - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras- > >ras_block.ras_comm; > + r = amdgpu_xgmi_ras_sw_init(adev); > + if (r) { > + dev_err(adev->dev, "Failed to initialize > xgmi_wafl_pcs ras block!\n"); > + return r; > + } > + > } > > return 0; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > index fef1575cd0cf..3fe24348d199 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c > @@ -1048,12 +1048,30 @@ struct amdgpu_ras_block_hw_ops > xgmi_ras_hw_ops = { > > struct amdgpu_xgmi_ras xgmi_ras = { > .ras_block = { > - .ras_comm = { > - .name = "xgmi_wafl", > - .block = AMDGPU_RAS_BLOCK__XGMI_WAFL, > - .type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, > - }, > .hw_ops = &xgmi_ras_hw_ops, > .ras_late_init = amdgpu_xgmi_ras_late_init, > }, > }; > + > +int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev) { > + int err; > + struct amdgpu_xgmi_ras *ras; > + > + if (!adev->gmc.xgmi.ras) > + return 0; > + > + ras = adev->gmc.xgmi.ras; > + err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register xgmi_wafl_pcs ras > block!\n"); > + return err; > + } > + > + strcpy(ras->ras_block.ras_comm.name, "xgmi_wafl_pcs"); > + ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__XGMI_WAFL; > + ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > + adev->gmc.xgmi.ras_if = &ras->ras_block.ras_comm; > + > + return 0; > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > index 30dcc1681b4e..86fbf56938f4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h > @@ -73,5 +73,6 @@ static inline bool amdgpu_xgmi_same_hive(struct > amdgpu_device *adev, > adev->gmc.xgmi.hive_id && > adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); } > +int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev); > > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 67c2a5186b8a..2a8dc9b52c2d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -1381,6 +1381,12 @@ static void gmc_v9_0_set_mca_ras_funcs(struct > amdgpu_device *adev) > } > } > > +static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev) { > + if (!adev->gmc.xgmi.connected_to_cpu) [Stanley]: Can we use if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__ XGMI_WAFL)) instead of if (!adev->gmc.xgmi.connected_to_cpu) to keep the ip ras judgment uniform. Regards, Stanley > + adev->gmc.xgmi.ras = &xgmi_ras; > +} > + > static int gmc_v9_0_early_init(void *handle) { > struct amdgpu_device *adev = (struct amdgpu_device *)handle; > @@ -1404,6 +1410,7 @@ static int gmc_v9_0_early_init(void *handle) > gmc_v9_0_set_gfxhub_funcs(adev); > gmc_v9_0_set_hdp_ras_funcs(adev); > gmc_v9_0_set_mca_ras_funcs(adev); > + gmc_v9_0_set_xgmi_ras_funcs(adev); > > adev->gmc.shared_aperture_start = 0x2000000000000000ULL; > adev->gmc.shared_aperture_end = > -- > 2.17.1