Define amdgpu_ras_sw_init function to initialize all ras blocks. V2: Modify error debugging information. Signed-off-by: yipechai <YiPeng.Chai@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 143 ++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 28 ---- drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 - drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 17 --- 9 files changed, 148 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6113ddc765a7..0c83eb69dad5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2402,6 +2402,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } } + r = amdgpu_ras_sw_init(adev); + if (r) { + DRM_ERROR("amdgpu_ras_sw_init failed (%d).\n", r); + goto init_failed; + } + if (amdgpu_sriov_vf(adev)) amdgpu_virt_init_data_exchange(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ab75e189bc0b..544241f357b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -440,8 +440,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev) { if (!adev->gmc.xgmi.connected_to_cpu) { adev->gmc.xgmi.ras = &xgmi_ras; - amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); - adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d3875618ebf5..89075ab9e82e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2299,8 +2299,6 @@ int amdgpu_ras_init(struct amdgpu_device *adev) case CHIP_ALDEBARAN: if (!adev->gmc.xgmi.connected_to_cpu) { adev->nbio.ras = &nbio_v7_4_ras; - amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); - adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; } break; default: @@ -2533,6 +2531,147 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) amdgpu_ras_disable_all_features(adev, 1); } +int amdgpu_ras_sw_init(struct amdgpu_device *adev) +{ + int err = 0; + + if (!amdgpu_ras_asic_supported(adev)) + return 0; + + if (adev->nbio.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register nbio ras block!\n"); + return err; + } + adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; + } + + if (adev->gmc.xgmi.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register xgmi ras block!\n"); + return err; + } + adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; + } + + if (adev->gfx.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register gfx ras block!\n"); + return err; + } + + strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx"); + adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX; + adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm; + + /* If not define special ras_late_init function, use gfx default ras_late_init */ + if (!adev->gfx.ras->ras_block.ras_late_init) + adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->gfx.ras->ras_block.ras_cb) + adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb; + } + + if (adev->umc.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register umc ras block!\n"); + return err; + } + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } + + if (adev->mmhub.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register mmhub ras block!\n"); + return err; + } + + strcpy(adev->mmhub.ras->ras_block.ras_comm.name, "mmhub"); + adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB; + adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm; + } + + if (adev->hdp.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->hdp.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register hdp ras block!\n"); + return err; + } + + adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm; + } + + if (adev->mca.mp0.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->mca.mp0.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register mca mp0 ras block!\n"); + return err; + } + adev->mca.mp0.ras_if = &adev->mca.mp0.ras->ras_block.ras_comm; + } + + if (adev->mca.mp1.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->mca.mp1.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register mca mp1 ras block!\n"); + return err; + } + adev->mca.mp1.ras_if = &adev->mca.mp1.ras->ras_block.ras_comm; + } + + if (adev->mca.mpio.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->mca.mpio.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register mca mpio ras block!\n"); + return err; + } + adev->mca.mpio.ras_if = &adev->mca.mpio.ras->ras_block.ras_comm; + } + + if (adev->sdma.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->sdma.ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register sdma ras block!\n"); + return err; + } + + strcpy(adev->sdma.ras->ras_block.ras_comm.name, "sdma"); + adev->sdma.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__SDMA; + adev->sdma.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->sdma.ras_if = &adev->sdma.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->sdma.ras->ras_block.ras_late_init) + adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->sdma.ras->ras_block.ras_cb) + adev->sdma.ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb; + } + + return 0; +} + int amdgpu_ras_late_init(struct amdgpu_device *adev) { struct amdgpu_ras_block_list *node, *tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7cddaad90d6d..fc5ed6c47443 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -595,6 +595,7 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); +int amdgpu_ras_sw_init(struct amdgpu_device *adev); int amdgpu_ras_late_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 8def7f630d4c..e26fc2ae98e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2188,27 +2188,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) break; } - if (adev->gfx.ras) { - err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block); - if (err) { - DRM_ERROR("Failed to register gfx ras block!\n"); - return err; - } - - strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx"); - adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX; - adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm; - - /* If not define special ras_late_init function, use gfx default ras_late_init */ - if (!adev->gfx.ras->ras_block.ras_late_init) - adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init; - - /* If not defined special ras_cb function, use default ras_cb */ - if (!adev->gfx.ras->ras_block.ras_cb) - adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb; - } - adev->gfx.config.gb_addr_config = gb_addr_config; adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d9353bb99314..5046be86702f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -669,22 +669,6 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) default: break; } - if (adev->umc.ras) { - amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); - - strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); - adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; - adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->umc.ras->ras_block.ras_late_init) - adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; - - /* If not defined special ras_cb function, use default ras_cb */ - if (!adev->umc.ras->ras_block.ras_cb) - adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; - } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4c3483fbe613..8f6aefb9be08 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1228,23 +1228,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) default: break; } - - if (adev->umc.ras) { - amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); - - strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); - adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; - adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->umc.ras->ras_block.ras_late_init) - adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; - - /* If not defined special ras_cb function, use default ras_cb */ - if (!adev->umc.ras->ras_block.ras_cb) - adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; - } } static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) @@ -1278,15 +1261,6 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) /* mmhub ras is not available */ break; } - - if (adev->mmhub.ras) { - amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block); - - strcpy(adev->mmhub.ras->ras_block.ras_comm.name, "mmhub"); - adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB; - adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm; - } } static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) @@ -1297,8 +1271,6 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) { adev->hdp.ras = &hdp_v4_0_ras; - amdgpu_ras_register_ras_block(adev, &adev->hdp.ras->ras_block); - adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm; } static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c index d4bd7d1d2649..3d2b974f6e0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -129,12 +129,6 @@ static void mca_v3_0_init(struct amdgpu_device *adev) mca->mp0.ras = &mca_v3_0_mp0_ras; mca->mp1.ras = &mca_v3_0_mp1_ras; mca->mpio.ras = &mca_v3_0_mpio_ras; - amdgpu_ras_register_ras_block(adev, &mca->mp0.ras->ras_block); - amdgpu_ras_register_ras_block(adev, &mca->mp1.ras->ras_block); - amdgpu_ras_register_ras_block(adev, &mca->mpio.ras->ras_block); - mca->mp0.ras_if = &mca->mp0.ras->ras_block.ras_comm; - mca->mp1.ras_if = &mca->mp1.ras->ras_block.ras_comm; - mca->mpio.ras_if = &mca->mpio.ras->ras_block.ras_comm; } const struct amdgpu_mca_funcs mca_v3_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 01b385568c14..fe61fcd22f18 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2809,23 +2809,6 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) default: break; } - - if (adev->sdma.ras) { - amdgpu_ras_register_ras_block(adev, &adev->sdma.ras->ras_block); - - strcpy(adev->sdma.ras->ras_block.ras_comm.name, "sdma"); - adev->sdma.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__SDMA; - adev->sdma.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->sdma.ras_if = &adev->sdma.ras->ras_block.ras_comm; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->sdma.ras->ras_block.ras_late_init) - adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init; - - /* If not defined special ras_cb function, use default ras_cb */ - if (!adev->sdma.ras->ras_block.ras_cb) - adev->sdma.ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb; - } } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { -- 2.25.1