On Wed, Jan 8, 2020 at 11:18 AM Hawking Zhang <Hawking.Zhang@xxxxxxx> wrote: > > SDMA edc counter registers were added in gfx edc counters > array. When querying gfx error counter in that array, there > is no way to differentiate sdma instance number for different > asic and then results to NULL pointer access when trying to > read sdma register base address for instances greater > than 2 on Vega20. > In addition, this also results to wrong gfx error counters > since it actually added sdma edc counters. > Therefore, sdma edc counter registers should be separated > from gfx edc counter regsiter array and only get initialized > when driver tries to enable sdma ras. > > Change-Id: I206917f9d7b81670a8fed84dc749085ce5a6f678 > Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +---------- > drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 7 +++++++ > 2 files changed, 8 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 33d1c57aaaf1..c9ade16bbcc3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -4038,14 +4038,6 @@ static const struct soc15_reg_entry sec_ded_counter_registers[] = { > { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2}, > { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6}, > { SOC15_REG_ENTRY(HDP, 0, mmHDP_EDC_CNT), 0, 1, 1}, > - { SOC15_REG_ENTRY(SDMA0, 0, mmSDMA0_EDC_COUNTER), 0, 1, 1}, > - { SOC15_REG_ENTRY(SDMA1, 0, mmSDMA1_EDC_COUNTER), 0, 1, 1}, > - { SOC15_REG_ENTRY(SDMA2, 0, mmSDMA2_EDC_COUNTER), 0, 1, 1}, > - { SOC15_REG_ENTRY(SDMA3, 0, mmSDMA3_EDC_COUNTER), 0, 1, 1}, > - { SOC15_REG_ENTRY(SDMA4, 0, mmSDMA4_EDC_COUNTER), 0, 1, 1}, > - { SOC15_REG_ENTRY(SDMA5, 0, mmSDMA5_EDC_COUNTER), 0, 1, 1}, > - { SOC15_REG_ENTRY(SDMA6, 0, mmSDMA6_EDC_COUNTER), 0, 1, 1}, > - { SOC15_REG_ENTRY(SDMA7, 0, mmSDMA7_EDC_COUNTER), 0, 1, 1}, > }; > > static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev) > @@ -4109,7 +4101,6 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) > adev->gfx.config.max_sh_per_se; > int sgpr_work_group_size = 5; > int gpr_reg_size = compute_dim_x / 16 + 6; > - int sec_ded_counter_reg_size = adev->sdma.num_instances + 34; > > /* only support when RAS is enabled */ > if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) > @@ -4249,7 +4240,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) > > /* read back registers to clear the counters */ > mutex_lock(&adev->grbm_idx_mutex); > - for (i = 0; i < sec_ded_counter_reg_size; i++) { > + for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) { > for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) { > for (k = 0; k < sec_ded_counter_registers[i].instance; k++) { > gfx_v9_0_select_se_sh(adev, j, 0x0, k); > diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > index fd20594b6d6e..f4107f9b75f3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c > @@ -1802,6 +1802,13 @@ static int sdma_v4_0_late_init(void *handle) > struct ras_ih_if ih_info = { > .cb = sdma_v4_0_process_ras_data_cb, > }; > + int i; > + > + /* read back edc counter registers to clear the counters */ > + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { > + for (i = 0; i < adev->sdma.num_instances; i++) > + RREG32_SDMA(i, mmSDMA0_EDC_COUNTER); > + } > > return adev->sdma.funcs->ras_late_init(adev, &ih_info); > } > -- > 2.17.1 > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx