[AMD Official Use Only] Hi yipe, One suggestion for this patch, please check my comment. Regards, Stanley > -----邮件原件----- > 发件人: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> 代表 yipechai > 发送时间: Tuesday, March 1, 2022 5:46 PM > 收件人: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > 抄送: Zhou1, Tao <Tao.Zhou1@xxxxxxx>; Zhang, Hawking > <Hawking.Zhang@xxxxxxx>; Clements, John <John.Clements@xxxxxxx>; > Chai, Thomas <YiPeng.Chai@xxxxxxx>; Chai, Thomas > <YiPeng.Chai@xxxxxxx> > 主题: [PATCH] drm/amdgpu: Move common initialization operations of each > ras block to one function > > Define amdgpu_ras_sw_init function to initialize all ras blocks. > > Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 + > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 - > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 143 > ++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 --- > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 --- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 28 ---- > drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 - > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 17 --- > 9 files changed, 148 insertions(+), 92 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 6113ddc765a7..72550e9f6058 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct > amdgpu_device *adev) > } > } > > + r = amdgpu_ras_sw_init(adev); > + if (r) { > + DRM_ERROR("amdgpu_ras_early_init failed (%d).\n", r); > + goto init_failed; > + } [Yang, Stanley] : This is ras blocks early init, I think it's more reasonable to move amdgpu_ras_sw_init before amdgpu_ras_init function. > + > if (amdgpu_sriov_vf(adev)) > amdgpu_virt_init_data_exchange(adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index ab75e189bc0b..544241f357b2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct > amdgpu_device *adev) { > if (!adev->gmc.xgmi.connected_to_cpu) { > adev->gmc.xgmi.ras = &xgmi_ras; > - amdgpu_ras_register_ras_block(adev, &adev- > >gmc.xgmi.ras->ras_block); > - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras- > >ras_block.ras_comm; > } > > return 0; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > index d3875618ebf5..89075ab9e82e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > @@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) > case CHIP_ALDEBARAN: > if (!adev->gmc.xgmi.connected_to_cpu) { > adev->nbio.ras = &nbio_v7_4_ras; > - amdgpu_ras_register_ras_block(adev, &adev- > >nbio.ras->ras_block); > - adev->nbio.ras_if = &adev->nbio.ras- > >ras_block.ras_comm; > } > break; > default: > @@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct > amdgpu_device *adev) > amdgpu_ras_disable_all_features(adev, 1); } > > +int amdgpu_ras_sw_init(struct amdgpu_device *adev) { > + int err = 0; > + > + if (!amdgpu_ras_asic_supported(adev)) > + return 0; > + > + if (adev->nbio.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >nbio.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register nbio ras > block!\n"); > + return err; > + } > + adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; > + } > + > + if (adev->gmc.xgmi.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >gmc.xgmi.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register xgmi ras > block!\n"); > + return err; > + } > + adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras- > >ras_block.ras_comm; > + } > + > + if (adev->gfx.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras- > >ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register gfx ras > block!\n"); > + return err; > + } > + > + strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx"); > + adev->gfx.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__GFX; > + adev->gfx.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > + adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm; > + > + /* If not define special ras_late_init function, use gfx default > ras_late_init */ > + if (!adev->gfx.ras->ras_block.ras_late_init) > + adev->gfx.ras->ras_block.ras_late_init = > amdgpu_gfx_ras_late_init; > + > + /* If not defined special ras_cb function, use default ras_cb > */ > + if (!adev->gfx.ras->ras_block.ras_cb) > + adev->gfx.ras->ras_block.ras_cb = > amdgpu_gfx_process_ras_data_cb; > + } > + > + if (adev->umc.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >umc.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register umc ras > block!\n"); > + return err; > + } > + > + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); > + adev->umc.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__UMC; > + adev->umc.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; > + > + /* If don't define special ras_late_init function, use default > ras_late_init */ > + if (!adev->umc.ras->ras_block.ras_late_init) > + adev->umc.ras->ras_block.ras_late_init = > amdgpu_umc_ras_late_init; > + > + /* If not defined special ras_cb function, use default ras_cb > */ > + if (!adev->umc.ras->ras_block.ras_cb) > + adev->umc.ras->ras_block.ras_cb = > amdgpu_umc_process_ras_data_cb; > + } > + > + if (adev->mmhub.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >mmhub.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register mmhub ras > block!\n"); > + return err; > + } > + > + strcpy(adev->mmhub.ras->ras_block.ras_comm.name, > "mmhub"); > + adev->mmhub.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__MMHUB; > + adev->mmhub.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > + adev->mmhub.ras_if = &adev->mmhub.ras- > >ras_block.ras_comm; > + } > + > + if (adev->hdp.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >hdp.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register hdp ras > block!\n"); > + return err; > + } > + > + adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm; > + } > + > + if (adev->mca.mp0.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >mca.mp0.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register mca mp0 ras > block!\n"); > + return err; > + } > + adev->mca.mp0.ras_if = &adev->mca.mp0.ras- > >ras_block.ras_comm; > + } > + > + if (adev->mca.mp1.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >mca.mp1.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register mca mp1 ras > block!\n"); > + return err; > + } > + adev->mca.mp1.ras_if = &adev->mca.mp1.ras- > >ras_block.ras_comm; > + } > + > + if (adev->mca.mpio.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >mca.mpio.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register mca mpio ras > block!\n"); > + return err; > + } > + adev->mca.mpio.ras_if = &adev->mca.mpio.ras- > >ras_block.ras_comm; > + } > + > + if (adev->sdma.ras) { > + err = amdgpu_ras_register_ras_block(adev, &adev- > >sdma.ras->ras_block); > + if (err) { > + dev_err(adev->dev, "Failed to register sdma ras > block!\n"); > + return err; > + } > + > + strcpy(adev->sdma.ras->ras_block.ras_comm.name, > "sdma"); > + adev->sdma.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__SDMA; > + adev->sdma.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > + adev->sdma.ras_if = &adev->sdma.ras- > >ras_block.ras_comm; > + > + /* If don't define special ras_late_init function, use default > ras_late_init */ > + if (!adev->sdma.ras->ras_block.ras_late_init) > + adev->sdma.ras->ras_block.ras_late_init = > amdgpu_sdma_ras_late_init; > + > + /* If not defined special ras_cb function, use default ras_cb > */ > + if (!adev->sdma.ras->ras_block.ras_cb) > + adev->sdma.ras->ras_block.ras_cb = > amdgpu_sdma_process_ras_data_cb; > + } > + > + return 0; > +} > + > int amdgpu_ras_late_init(struct amdgpu_device *adev) { > struct amdgpu_ras_block_list *node, *tmp; diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > index 7cddaad90d6d..fc5ed6c47443 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > @@ -595,6 +595,7 @@ amdgpu_ras_error_to_ta(enum > amdgpu_ras_error_type error) { > > /* called in ip_init and ip_fini */ > int amdgpu_ras_init(struct amdgpu_device *adev); > +int amdgpu_ras_sw_init(struct amdgpu_device *adev); > int amdgpu_ras_late_init(struct amdgpu_device *adev); int > amdgpu_ras_fini(struct amdgpu_device *adev); int > amdgpu_ras_pre_fini(struct amdgpu_device *adev); diff --git > a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 8def7f630d4c..e26fc2ae98e1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -2188,27 +2188,6 @@ static int gfx_v9_0_gpu_early_init(struct > amdgpu_device *adev) > break; > } > > - if (adev->gfx.ras) { > - err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras- > >ras_block); > - if (err) { > - DRM_ERROR("Failed to register gfx ras block!\n"); > - return err; > - } > - > - strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx"); > - adev->gfx.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__GFX; > - adev->gfx.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > - adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm; > - > - /* If not define special ras_late_init function, use gfx default > ras_late_init */ > - if (!adev->gfx.ras->ras_block.ras_late_init) > - adev->gfx.ras->ras_block.ras_late_init = > amdgpu_gfx_ras_late_init; > - > - /* If not defined special ras_cb function, use default ras_cb > */ > - if (!adev->gfx.ras->ras_block.ras_cb) > - adev->gfx.ras->ras_block.ras_cb = > amdgpu_gfx_process_ras_data_cb; > - } > - > adev->gfx.config.gb_addr_config = gb_addr_config; > > adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << diff --git > a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index d9353bb99314..5046be86702f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -669,22 +669,6 @@ static void gmc_v10_0_set_umc_funcs(struct > amdgpu_device *adev) > default: > break; > } > - if (adev->umc.ras) { > - amdgpu_ras_register_ras_block(adev, &adev->umc.ras- > >ras_block); > - > - strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); > - adev->umc.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__UMC; > - adev->umc.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > - adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; > - > - /* If don't define special ras_late_init function, use default > ras_late_init */ > - if (!adev->umc.ras->ras_block.ras_late_init) > - adev->umc.ras->ras_block.ras_late_init = > amdgpu_umc_ras_late_init; > - > - /* If not defined special ras_cb function, use default ras_cb > */ > - if (!adev->umc.ras->ras_block.ras_cb) > - adev->umc.ras->ras_block.ras_cb = > amdgpu_umc_process_ras_data_cb; > - } > } > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 4c3483fbe613..8f6aefb9be08 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -1228,23 +1228,6 @@ static void gmc_v9_0_set_umc_funcs(struct > amdgpu_device *adev) > default: > break; > } > - > - if (adev->umc.ras) { > - amdgpu_ras_register_ras_block(adev, &adev->umc.ras- > >ras_block); > - > - strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); > - adev->umc.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__UMC; > - adev->umc.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > - adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; > - > - /* If don't define special ras_late_init function, use default > ras_late_init */ > - if (!adev->umc.ras->ras_block.ras_late_init) > - adev->umc.ras->ras_block.ras_late_init = > amdgpu_umc_ras_late_init; > - > - /* If not defined special ras_cb function, use default ras_cb > */ > - if (!adev->umc.ras->ras_block.ras_cb) > - adev->umc.ras->ras_block.ras_cb = > amdgpu_umc_process_ras_data_cb; > - } > } > > static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) @@ > -1278,15 +1261,6 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct > amdgpu_device *adev) > /* mmhub ras is not available */ > break; > } > - > - if (adev->mmhub.ras) { > - amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras- > >ras_block); > - > - strcpy(adev->mmhub.ras->ras_block.ras_comm.name, > "mmhub"); > - adev->mmhub.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__MMHUB; > - adev->mmhub.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > - adev->mmhub.ras_if = &adev->mmhub.ras- > >ras_block.ras_comm; > - } > } > > static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) @@ > -1297,8 +1271,6 @@ static void gmc_v9_0_set_gfxhub_funcs(struct > amdgpu_device *adev) static void gmc_v9_0_set_hdp_ras_funcs(struct > amdgpu_device *adev) { > adev->hdp.ras = &hdp_v4_0_ras; > - amdgpu_ras_register_ras_block(adev, &adev->hdp.ras->ras_block); > - adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm; > } > > static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) diff --git > a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c > b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c > index d4bd7d1d2649..3d2b974f6e0f 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c > @@ -129,12 +129,6 @@ static void mca_v3_0_init(struct amdgpu_device > *adev) > mca->mp0.ras = &mca_v3_0_mp0_ras; > mca->mp1.ras = &mca_v3_0_mp1_ras; > mca->mpio.ras = &mca_v3_0_mpio_ras; > - amdgpu_ras_register_ras_block(adev, &mca->mp0.ras->ras_block); > - amdgpu_ras_register_ras_block(adev, &mca->mp1.ras->ras_block); > - amdgpu_ras_register_ras_block(adev, &mca->mpio.ras->ras_block); > - mca->mp0.ras_if = &mca->mp0.ras->ras_block.ras_comm; > - mca->mp1.ras_if = &mca->mp1.ras->ras_block.ras_comm; > - mca->mpio.ras_if = &mca->mpio.ras->ras_block.ras_comm; > } > > const struct amdgpu_mca_funcs mca_v3_0_funcs = { diff --git > a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index 01b385568c14..fe61fcd22f18 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -2809,23 +2809,6 @@ static void sdma_v4_0_set_ras_funcs(struct > amdgpu_device *adev) > default: > break; > } > - > - if (adev->sdma.ras) { > - amdgpu_ras_register_ras_block(adev, &adev->sdma.ras- > >ras_block); > - > - strcpy(adev->sdma.ras->ras_block.ras_comm.name, > "sdma"); > - adev->sdma.ras->ras_block.ras_comm.block = > AMDGPU_RAS_BLOCK__SDMA; > - adev->sdma.ras->ras_block.ras_comm.type = > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; > - adev->sdma.ras_if = &adev->sdma.ras- > >ras_block.ras_comm; > - > - /* If don't define special ras_late_init function, use default > ras_late_init */ > - if (!adev->sdma.ras->ras_block.ras_late_init) > - adev->sdma.ras->ras_block.ras_late_init = > amdgpu_sdma_ras_late_init; > - > - /* If not defined special ras_cb function, use default ras_cb > */ > - if (!adev->sdma.ras->ras_block.ras_cb) > - adev->sdma.ras->ras_block.ras_cb = > amdgpu_sdma_process_ras_data_cb; > - } > } > > const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { > -- > 2.25.1