From: Lijo Lazar <lijo.lazar@xxxxxxx> There could be configs where some UMC instances are harvested. This information is obtained through discovery data and populated in umc.active_mask. Avoid reassigning this as AID mask, instead use the mask directly while iterating through umc instances. This is to avoid accesses to harvested UMC instances. v2: fix warning (Alex) Signed-off-by: Lijo Lazar <lijo.lazar@xxxxxxx> Reviewed-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 42 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 - 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index eafe20d8fe0b6..0a1ef95b28668 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -387,6 +387,45 @@ int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, return 0; } +static int amdgpu_umc_loop_all_aid(struct amdgpu_device *adev, umc_func func, + void *data) +{ + uint32_t umc_node_inst; + uint32_t node_inst; + uint32_t umc_inst; + uint32_t ch_inst; + int ret; + + /* + * This loop is done based on the following - + * umc.active mask = mask of active umc instances across all nodes + * umc.umc_inst_num = maximum number of umc instancess per node + * umc.node_inst_num = maximum number of node instances + * Channel instances are not assumed to be harvested. + */ + dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d", + adev->umc.active_mask, adev->umc.umc_inst_num); + for_each_set_bit(umc_node_inst, &(adev->umc.active_mask), + adev->umc.node_inst_num * adev->umc.umc_inst_num) { + node_inst = umc_node_inst / adev->umc.umc_inst_num; + umc_inst = umc_node_inst % adev->umc.umc_inst_num; + LOOP_UMC_CH_INST(ch_inst) { + dev_dbg(adev->dev, + "node_inst :%d umc_inst: %d ch_inst: %d", + node_inst, umc_inst, ch_inst); + ret = func(adev, node_inst, umc_inst, ch_inst, data); + if (ret) { + dev_err(adev->dev, + "Node %d umc %d ch %d func returns %d\n", + node_inst, umc_inst, ch_inst, ret); + return ret; + } + } + } + + return 0; +} + int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data) { @@ -395,6 +434,9 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, uint32_t ch_inst = 0; int ret = 0; + if (adev->aid_mask) + return amdgpu_umc_loop_all_aid(adev, func, data); + if (adev->umc.node_inst_num) { LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { ret = func(adev, node_inst, umc_inst, ch_inst, data); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 291549765c38c..71b8ae7f2194e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1504,7 +1504,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; - adev->umc.active_mask = adev->aid_mask; adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; -- 2.47.1