On 2019-07-12 9:46 a.m., Haehnle, Nicolai wrote: > Prefetch mode 0 is not supported and can lead to hangs with certain very > specific code patterns. Set a sound prefetch mode for all VMIDs rather > than forcing all shaders to set the prefetch mode at the beginning. > > Reduce code duplication a bit while we're at it. Note that the 64-bit > address mode enum and the retry all enum are both 0, so the only > functional change is in the INITIAL_INST_PREFETCH field. > > Signed-off-by: Nicolai Hähnle <nicolai.haehnle@xxxxxxx> I forwarded this to the compute compiler team as well. They have no objections and agree this is the right thing to do. Acked-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > -- > I haven't been able to properly test this yet, but it is the right thing > to be doing in principle. > --- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 ++++++++++---------------- > 1 file changed, 10 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 0d94c812df1b..b8498c359191 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -157,20 +157,27 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = > SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), > SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000), > }; > > static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = > { > /* Pending on emulation bring up */ > }; > > +#define DEFAULT_SH_MEM_CONFIG \ > + ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ > + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ > + (SH_MEM_RETRY_MODE_ALL << SH_MEM_CONFIG__RETRY_MODE__SHIFT) | \ > + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT)) > + > + > static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev); > static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev); > static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev); > static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev); > static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, > struct amdgpu_cu_info *cu_info); > static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev); > static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, > u32 sh_num, u32 instance); > static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev); > @@ -1476,40 +1483,35 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade > return pa_sc_tile_steering_override; > } > > #define DEFAULT_SH_MEM_BASES (0x6000) > #define FIRST_COMPUTE_VMID (8) > #define LAST_COMPUTE_VMID (16) > > static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) > { > int i; > - uint32_t sh_mem_config; > uint32_t sh_mem_bases; > > /* > * Configure apertures: > * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) > * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) > * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) > */ > sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); > > - sh_mem_config = SH_MEM_ADDRESS_MODE_64 | > - SH_MEM_ALIGNMENT_MODE_UNALIGNED << > - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; > - > mutex_lock(&adev->srbm_mutex); > for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { > nv_grbm_select(adev, 0, 0, 0, i); > /* CP and shaders */ > - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); > + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); > WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases); > } > nv_grbm_select(adev, 0, 0, 0, 0); > mutex_unlock(&adev->srbm_mutex); > } > > static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) > { > int i, j, k; > int max_wgp_per_sh = adev->gfx.config.max_cu_per_sh >> 1; > @@ -1590,31 +1592,22 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) > gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); > adev->gfx.config.pa_sc_tile_steering_override = > gfx_v10_0_init_pa_sc_tile_steering_override(adev); > > /* XXX SH_MEM regs */ > /* where to put LDS, scratch, GPUVM in FSA64 space */ > mutex_lock(&adev->srbm_mutex); > for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { > nv_grbm_select(adev, 0, 0, 0, i); > /* CP and shaders */ > - if (i == 0) { > - tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, > - SH_MEM_ALIGNMENT_MODE_UNALIGNED); > - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_MODE, 0); > - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); > - WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0); > - } else { > - tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, > - SH_MEM_ALIGNMENT_MODE_UNALIGNED); > - tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_MODE, 0); > - WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp); > + WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); > + if (i != 0) { > tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, > (adev->gmc.private_aperture_start >> 48)); > tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, > (adev->gmc.shared_aperture_start >> 48)); > WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp); > } > } > nv_grbm_select(adev, 0, 0, 0, 0); > > mutex_unlock(&adev->srbm_mutex); _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx