[AMD Official Use Only] Thanks Lijo. However, I'm not quite sure whether " 0xffffffff << (4 * max_wgp_per_sh);" is a valid expression since it kind of triggers some overflow. Can that work for non-x86 platform or even work reliably for x86 platform? BR Evan > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Sent: Monday, June 21, 2021 8:08 PM > To: Quan, Evan <Evan.Quan@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Deucher, Alexander <Alexander.Deucher@xxxxxxx> > Subject: Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting > > One minor comment below, apart from that the series looks good. > > Reviewed-by: Lijo Lazar <lijo.lazar@xxxxxxx> > > On 6/21/2021 12:30 PM, Evan Quan wrote: > > Add missing settings for SQC bits. And correct some confusing logics > > around active wgp bitmap calculation. > > > > Change-Id: If4992e175fd61d5609b00328cbe21f487517d039 > > Signed-off-by: Evan Quan <evan.quan@xxxxxxx> > > --- > > V1->V2: > > - restore correct tcp_harvest setting for NV10 and NV12 > > - move asic type guard upper layer for better readability > > --- > > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 103 ++++++++++++++------ > ----- > > 1 file changed, 57 insertions(+), 46 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > index 15ae9e33b925..384b95fbad8b 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > > @@ -5090,47 +5090,50 @@ static void gfx_v10_0_tcp_harvest(struct > amdgpu_device *adev) > > 4 + /* RMI */ > > 1); /* SQG */ > > > > - if (adev->asic_type == CHIP_NAVI10 || > > - adev->asic_type == CHIP_NAVI14 || > > - adev->asic_type == CHIP_NAVI12) { > > - mutex_lock(&adev->grbm_idx_mutex); > > - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > > - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { > > - gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); > > - wgp_active_bitmap = > gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); > > - /* > > - * Set corresponding TCP bits for the inactive > WGPs in > > - * GCRD_SA_TARGETS_DISABLE > > - */ > > - gcrd_targets_disable_tcp = 0; > > - /* Set TCP & SQC bits in > UTCL1_UTCL0_INVREQ_DISABLE */ > > - utcl_invreq_disable = 0; > > - > > - for (k = 0; k < max_wgp_per_sh; k++) { > > - if (!(wgp_active_bitmap & (1 << k))) { > > - gcrd_targets_disable_tcp |= > 3 << (2 * k); > > - utcl_invreq_disable |= (3 << > (2 * k)) | > > - (3 << (2 * > (max_wgp_per_sh + k))); > > - } > > + mutex_lock(&adev->grbm_idx_mutex); > > + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > > + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { > > + gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); > > + wgp_active_bitmap = > gfx_v10_0_get_wgp_active_bitmap_per_sh(adev); > > + /* > > + * Set corresponding TCP bits for the inactive WGPs in > > + * GCRD_SA_TARGETS_DISABLE > > + */ > > + gcrd_targets_disable_tcp = 0; > > + /* Set TCP & SQC bits in > UTCL1_UTCL0_INVREQ_DISABLE */ > > + utcl_invreq_disable = 0; > > + > > + for (k = 0; k < max_wgp_per_sh; k++) { > > + if (!(wgp_active_bitmap & (1 << k))) { > > + gcrd_targets_disable_tcp |= 3 << (2 * > k); > > + gcrd_targets_disable_tcp |= 1 << (k + > (max_wgp_per_sh * 2)); > > + utcl_invreq_disable |= (3 << (2 * k)) | > > + (3 << (2 * (max_wgp_per_sh > + k))); > > } > > - > > - tmp = RREG32_SOC15(GC, 0, > mmUTCL1_UTCL0_INVREQ_DISABLE); > > - /* only override TCP & SQC bits */ > > - tmp &= 0xffffffff << (4 * max_wgp_per_sh); > > - tmp |= (utcl_invreq_disable & > utcl_invreq_disable_mask); > > - WREG32_SOC15(GC, 0, > mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); > > - > > - tmp = RREG32_SOC15(GC, 0, > mmGCRD_SA_TARGETS_DISABLE); > > - /* only override TCP bits */ > > - tmp &= 0xffffffff << (2 * max_wgp_per_sh); > > - tmp |= (gcrd_targets_disable_tcp & > gcrd_targets_disable_mask); > > - WREG32_SOC15(GC, 0, > mmGCRD_SA_TARGETS_DISABLE, tmp); > > } > > - } > > > > - gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); > > - mutex_unlock(&adev->grbm_idx_mutex); > > + tmp = RREG32_SOC15(GC, 0, > mmUTCL1_UTCL0_INVREQ_DISABLE); > > + /* only override TCP & SQC bits */ > > + if (adev->asic_type == CHIP_NAVI14) > > + tmp &= 0xff000000; > > + else > > + tmp &=0xfff00000; > > For the disable field mask calculation (which is the value that is applied finally), > there is no ASIC check. The above code may utilize the same method as in > the original code without ASIC check. > > tmp &= 0xffffffff << (4 * max_wgp_per_sh); > > Same for below case also - 3*max_wgp_per_sh. > > Thanks, > Lijo > > > + tmp |= (utcl_invreq_disable & > utcl_invreq_disable_mask); > > + WREG32_SOC15(GC, 0, > mmUTCL1_UTCL0_INVREQ_DISABLE, tmp); > > + > > + tmp = RREG32_SOC15(GC, 0, > mmGCRD_SA_TARGETS_DISABLE); > > + /* only override TCP & SQC bits */ > > + if (adev->asic_type == CHIP_NAVI14) > > + tmp &= 0xfffc0000; > > + else > > + tmp &= 0xffff8000; > > + tmp |= (gcrd_targets_disable_tcp & > gcrd_targets_disable_mask); > > + WREG32_SOC15(GC, 0, > mmGCRD_SA_TARGETS_DISABLE, tmp); > > + } > > } > > + > > + gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); > > + mutex_unlock(&adev->grbm_idx_mutex); > > } > > > > static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@ > > -7408,7 +7411,10 @@ static int gfx_v10_0_hw_init(void *handle) > > * init golden registers and rlc resume may override some registers, > > * reconfig them here > > */ > > - gfx_v10_0_tcp_harvest(adev); > > + if (adev->asic_type == CHIP_NAVI10 || > > + adev->asic_type == CHIP_NAVI14 || > > + adev->asic_type == CHIP_NAVI12) > > + gfx_v10_0_tcp_harvest(adev); > > > > r = gfx_v10_0_cp_resume(adev); > > if (r) > > @@ -9328,17 +9334,22 @@ static void > > gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device > * > > > > static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct > amdgpu_device *adev) > > { > > - u32 data, wgp_bitmask; > > - data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); > > - data |= RREG32_SOC15(GC, 0, > mmGC_USER_SHADER_ARRAY_CONFIG); > > + u32 disabled_mask = > > + ~amdgpu_gfx_create_bitmask(adev- > >gfx.config.max_cu_per_sh >> 1); > > + u32 efuse_setting = 0; > > + u32 vbios_setting = 0; > > + > > + efuse_setting = RREG32_SOC15(GC, 0, > mmCC_GC_SHADER_ARRAY_CONFIG); > > + efuse_setting &= > CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; > > + efuse_setting >>= > CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; > > > > - data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; > > - data >>= > CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; > > + vbios_setting = RREG32_SOC15(GC, 0, > mmGC_USER_SHADER_ARRAY_CONFIG); > > + vbios_setting &= > GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK; > > + vbios_setting >>= > GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT; > > > > - wgp_bitmask = > > - amdgpu_gfx_create_bitmask(adev- > >gfx.config.max_cu_per_sh >> 1); > > + disabled_mask |= efuse_setting | vbios_setting; > > > > - return (~data) & wgp_bitmask; > > + return (~disabled_mask); > > } > > > > static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct > > amdgpu_device *adev) > > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx