Initialize umc ras block only when umc ip block supports ras. Driver queries ras capabilities after early_init, ras block init needs to be moved to sw_init. Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 16 +++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 30 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 26 ++++----------------- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 21 ++++------------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 ++++----------------- 7 files changed, 58 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 6830f671cde7..97a12d8d786a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -446,9 +446,21 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, } while (fault->timestamp < tmp); } -int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev) +int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev) { - if (!adev->gmc.xgmi.connected_to_cpu) { + int r; + + /* umc ras block */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) { + r = amdgpu_umc_ras_sw_init(adev); + if (r) { + dev_err(adev->dev, "Failed to initialize umc ras block!\n"); + return r; + } + } + + /* xgmi ras block */ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) { adev->gmc.xgmi.ras = &xgmi_ras; amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 0305b660cd17..f1773abd5e1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -351,7 +351,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint16_t pasid, uint64_t timestamp); void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid); -int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev); +int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 1b8574bc4463..da68ceaa024c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -208,6 +208,36 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); } +int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev) +{ + int err; + struct amdgpu_umc_ras *ras; + + if (!adev->umc.ras) + return 0; + + ras = adev->umc.ras; + + err = amdgpu_ras_register_ras_block(adev, &ras->ras_block); + if (err) { + dev_err(adev->dev, "Failed to register umc ras block!\n"); + return err; + } + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &ras->ras_block.ras_comm; + + if (!ras->ras_block.ras_late_init) + ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + if (ras->ras_block.ras_cb) + ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + + return 0; +} + int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 36e19336f3b3..d7f1229ff11f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -87,6 +87,7 @@ struct amdgpu_umc { unsigned long active_mask; }; +int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index c59c2332d191..924f6f38fae6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -703,25 +703,8 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) default: break; } - if (adev->umc.ras) { - amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); - - strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); - adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; - adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->umc.ras->ras_block.ras_late_init) - adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; - - /* If not defined special ras_cb function, use default ras_cb */ - if (!adev->umc.ras->ras_block.ras_cb) - adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; - } } - static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[MMHUB_HWIP][0]) { @@ -758,7 +741,6 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) static int gmc_v10_0_early_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gmc_v10_0_set_mmhub_funcs(adev); @@ -774,10 +756,6 @@ static int gmc_v10_0_early_init(void *handle) adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; - r = amdgpu_gmc_ras_early_init(adev); - if (r) - return r; - return 0; } @@ -1028,6 +1006,10 @@ static int gmc_v10_0_sw_init(void *handle) amdgpu_vm_manager_init(adev); + r = amdgpu_gmc_ras_sw_init(adev); + if (r) + return r; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index af7b3ba1ca00..1c585cc24857 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -581,23 +581,6 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) default: break; } - - if (adev->umc.ras) { - amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); - - strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); - adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; - adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->umc.ras->ras_block.ras_late_init) - adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; - - /* If not define special ras_cb function, use default ras_cb */ - if (!adev->umc.ras->ras_block.ras_cb) - adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; - } } @@ -846,6 +829,10 @@ static int gmc_v11_0_sw_init(void *handle) amdgpu_vm_manager_init(adev); + r = amdgpu_gmc_ras_sw_init(adev); + if (r) + return r; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b06170c00dfc..e9b6599e790c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1318,23 +1318,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) default: break; } - - if (adev->umc.ras) { - amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); - - strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); - adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; - adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->umc.ras->ras_block.ras_late_init) - adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; - - /* If not defined special ras_cb function, use default ras_cb */ - if (!adev->umc.ras->ras_block.ras_cb) - adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; - } } static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) @@ -1406,7 +1389,6 @@ static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) static int gmc_v9_0_early_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */ @@ -1436,10 +1418,6 @@ static int gmc_v9_0_early_init(void *handle) adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; - r = amdgpu_gmc_ras_early_init(adev); - if (r) - return r; - return 0; } @@ -1798,6 +1776,10 @@ static int gmc_v9_0_sw_init(void *handle) gmc_v9_0_save_registers(adev); + r = amdgpu_gmc_ras_sw_init(adev); + if (r) + return r; + return 0; } -- 2.17.1