umc ras is not managed by gpu driver when gpu is connected to cpu through xgmi. split umc callbacks into ras and non-ras ones so gpu driver only initializes umc ras callbacks when it manages umc ras. Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Reivewed-by: Dennis Li <Dennis.Li@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 ++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 ++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 17 +++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 4 +++- drivers/gpu/drm/amd/amdgpu/umc_v6_1.h | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 4 +++- drivers/gpu/drm/amd/amdgpu/umc_v6_7.h | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 4 +++- drivers/gpu/drm/amd/amdgpu/umc_v8_7.h | 2 +- 12 files changed, 51 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 082f9d03a94a..3411c6577a18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -391,8 +391,9 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) { int r; - if (adev->umc.funcs && adev->umc.funcs->ras_late_init) { - r = adev->umc.funcs->ras_late_init(adev); + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ras_late_init) { + r = adev->umc.ras_funcs->ras_late_init(adev); if (r) return r; } @@ -418,8 +419,12 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) { - amdgpu_umc_ras_fini(adev); + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->ras_fini) + adev->umc.ras_funcs->ras_fini(adev); + amdgpu_mmhub_ras_fini(adev); + if (adev->gmc.xgmi.ras_funcs && adev->gmc.xgmi.ras_funcs->ras_fini) adev->gmc.xgmi.ras_funcs->ras_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 172738cc99db..459a470744f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -774,13 +774,15 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, switch (info->head.block) { case AMDGPU_RAS_BLOCK__UMC: - if (adev->umc.funcs->query_ras_error_count) - adev->umc.funcs->query_ras_error_count(adev, &err_data); + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_count) + adev->umc.ras_funcs->query_ras_error_count(adev, &err_data); /* umc query_ras_error_address is also responsible for clearing * error status */ - if (adev->umc.funcs->query_ras_error_address) - adev->umc.funcs->query_ras_error_address(adev, &err_data); + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_address) + adev->umc.ras_funcs->query_ras_error_address(adev, &err_data); break; case AMDGPU_RAS_BLOCK__SDMA: if (adev->sdma.funcs->query_ras_error_count) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index a2975c8092a9..ea6f99be070b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -60,8 +60,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) } /* ras init of specific umc version */ - if (adev->umc.funcs && adev->umc.funcs->err_cnt_init) - adev->umc.funcs->err_cnt_init(adev); + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->err_cnt_init) + adev->umc.ras_funcs->err_cnt_init(adev); return 0; @@ -95,12 +96,12 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->umc.funcs && - adev->umc.funcs->query_ras_error_count) - adev->umc.funcs->query_ras_error_count(adev, ras_error_status); + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_count) + adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); - if (adev->umc.funcs && - adev->umc.funcs->query_ras_error_address && + if (adev->umc.ras_funcs && + adev->umc.ras_funcs->query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { err_data->err_addr = kcalloc(adev->umc.max_ras_err_cnt_per_query, @@ -116,7 +117,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, /* umc query_ras_error_address is also responsible for clearing * error status */ - adev->umc.funcs->query_ras_error_address(adev, ras_error_status); + adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); } /* only uncorrectable error needs gpu reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 183814493658..bbcccf53080d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -35,13 +35,17 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) -struct amdgpu_umc_funcs { +struct amdgpu_umc_ras_funcs { void (*err_cnt_init)(struct amdgpu_device *adev); int (*ras_late_init)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev); void (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); + void *ras_error_status); void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); +}; + +struct amdgpu_umc_funcs { void (*init_registers)(struct amdgpu_device *adev); }; @@ -59,6 +63,7 @@ struct amdgpu_umc { struct ras_common_if *ras_if; const struct amdgpu_umc_funcs *funcs; + const struct amdgpu_umc_ras_funcs *ras_funcs; }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 33e54eed2eec..2bfd620576f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -655,7 +655,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA; adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0]; - adev->umc.funcs = &umc_v8_7_funcs; + adev->umc.ras_funcs = &umc_v8_7_ras_funcs; break; default: break; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index ce2b1a9125ea..c353254ea5dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1155,7 +1155,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.funcs = &umc_v6_1_funcs; + adev->umc.ras_funcs = &umc_v6_1_ras_funcs; break; case CHIP_ARCTURUS: adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; @@ -1163,7 +1163,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.funcs = &umc_v6_1_funcs; + adev->umc.ras_funcs = &umc_v6_1_ras_funcs; break; default: break; @@ -1194,12 +1194,6 @@ static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gmc_v9_0_set_gmc_funcs(adev); - gmc_v9_0_set_irq_funcs(adev); - gmc_v9_0_set_umc_funcs(adev); - gmc_v9_0_set_mmhub_funcs(adev); - gmc_v9_0_set_gfxhub_funcs(adev); - if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; @@ -1210,6 +1204,12 @@ static int gmc_v9_0_early_init(void *handle) adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } + gmc_v9_0_set_gmc_funcs(adev); + gmc_v9_0_set_irq_funcs(adev); + gmc_v9_0_set_umc_funcs(adev); + gmc_v9_0_set_mmhub_funcs(adev); + gmc_v9_0_set_gfxhub_funcs(adev); + adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = adev->gmc.shared_aperture_start + (4ULL << 30) - 1; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 96d7769609f4..20b44983ac94 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -22,6 +22,7 @@ */ #include "umc_v6_1.h" #include "amdgpu_ras.h" +#include "amdgpu_umc.h" #include "amdgpu.h" #include "rsmu/rsmu_0_0_2_offset.h" @@ -464,9 +465,10 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) umc_v6_1_enable_umc_index_mode(adev); } -const struct amdgpu_umc_funcs umc_v6_1_funcs = { +const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = { .err_cnt_init = umc_v6_1_err_cnt_init, .ras_late_init = amdgpu_umc_ras_late_init, + .ras_fini = amdgpu_umc_ras_fini, .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h index 0ce1d323cfdd..5dc36c730bb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -45,7 +45,7 @@ /* umc ce count initial value */ #define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) -extern const struct amdgpu_umc_funcs umc_v6_1_funcs; +extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs; extern const uint32_t umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 3c3fb01b7c20..3a8f787374c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -22,6 +22,7 @@ */ #include "umc_v6_7.h" #include "amdgpu_ras.h" +#include "amdgpu_umc.h" #include "amdgpu.h" #include "umc/umc_6_7_0_offset.h" @@ -272,8 +273,9 @@ static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, } } -const struct amdgpu_umc_funcs umc_v6_7_funcs = { +const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = { .ras_late_init = amdgpu_umc_ras_late_init, + .ras_fini = amdgpu_umc_ras_fini, .query_ras_error_count = umc_v6_7_query_ras_error_count, .query_ras_error_address = umc_v6_7_query_ras_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index e59dbdb6ef9b..4eb85f247e96 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -32,6 +32,6 @@ #define UMC_V6_7_INST_DIST 0x40000 -extern const struct amdgpu_umc_funcs umc_v6_7_funcs; +extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index a064c097690c..89d20adfa001 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -22,6 +22,7 @@ */ #include "umc_v8_7.h" #include "amdgpu_ras.h" +#include "amdgpu_umc.h" #include "amdgpu.h" #include "rsmu/rsmu_0_0_2_offset.h" @@ -323,9 +324,10 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev) } } -const struct amdgpu_umc_funcs umc_v8_7_funcs = { +const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = { .err_cnt_init = umc_v8_7_err_cnt_init, .ras_late_init = amdgpu_umc_ras_late_init, + .ras_fini = amdgpu_umc_ras_fini, .query_ras_error_count = umc_v8_7_query_ras_error_count, .query_ras_error_address = umc_v8_7_query_ras_error_address, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h index d4d0468e3df5..37e6dc7c28e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h @@ -44,7 +44,7 @@ /* umc ce count initial value */ #define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD) -extern const struct amdgpu_umc_funcs umc_v8_7_funcs; +extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs; extern const uint32_t umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM]; -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx