On 28.06.2016 16:43, Tom St Denis wrote: > Add ability to specify instance in select_se_sh callback. > Defaults to 0 all over the driver. > > Signed-off-by: Tom St Denis <tom.stdenis at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- > drivers/gpu/drm/amd/amdgpu/cik.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 25 +++++++++++++------------ > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 19 ++++++++++--------- > drivers/gpu/drm/amd/amdgpu/vi.c | 4 ++-- > 5 files changed, 29 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 780a830b2bbd..d7efbd0ee983 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1158,7 +1158,7 @@ struct amdgpu_cu_info { > struct amdgpu_gfx_funcs { > /* get the gpu clock counter */ > uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); > - void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num); > + void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); > }; > > struct amdgpu_gfx { > @@ -2295,7 +2295,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) > #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev)) > #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e)) > #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) > -#define amdgpu_gfx_select_se_sh(adev, se, sh) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh)) > +#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) > > #define amdgpu_dpm_get_temperature(adev) \ > ((adev)->pp_enabled ? \ > diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c > index ebc8c256286b..1799612b8299 100644 > --- a/drivers/gpu/drm/amd/amdgpu/cik.c > +++ b/drivers/gpu/drm/amd/amdgpu/cik.c > @@ -1036,12 +1036,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, > > mutex_lock(&adev->grbm_idx_mutex); > if (se_num != 0xffffffff || sh_num != 0xffffffff) > - amdgpu_gfx_select_se_sh(adev, se_num, sh_num); > + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0); > > val = RREG32(reg_offset); > > if (se_num != 0xffffffff || sh_num != 0xffffffff) > - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff); > + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > mutex_unlock(&adev->grbm_idx_mutex); > return val; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > index 9edec8a98ee2..44def1665f0c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > @@ -1584,10 +1584,11 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) > * broadcast to all SEs or SHs (CIK). > */ > static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, > - u32 se_num, u32 sh_num) > + u32 se_num, u32 sh_num, u32 instance) > { > u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK; > > + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); INSTANCE_INDEX and INSTANCE_BROADCAST_WRITES have the same relationship as SH/SE_INDEX and SH/SE_BROADCAST_WRITES. It's actually kind of odd that the current code either sets both of SH/SE_BROADCAST_WRITES or none of them. It should logically be possible to broadcast writes to all SHs in one SE, for example. Nicolai > if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) > data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | > GRBM_GFX_INDEX__SE_BROADCAST_WRITES_MASK; > @@ -1660,13 +1661,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) > mutex_lock(&adev->grbm_idx_mutex); > for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { > - gfx_v7_0_select_se_sh(adev, i, j); > + gfx_v7_0_select_se_sh(adev, i, j, 0); > data = gfx_v7_0_get_rb_active_bitmap(adev); > active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * > rb_bitmap_width_per_sh); > } > } > - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > mutex_unlock(&adev->grbm_idx_mutex); > > adev->gfx.config.backend_enable_mask = active_rbs; > @@ -1747,7 +1748,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) > * making sure that the following register writes will be broadcasted > * to all the shaders > */ > - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > > /* XXX SH_MEM regs */ > /* where to put LDS, scratch, GPUVM in FSA64 space */ > @@ -3381,7 +3382,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) > mutex_lock(&adev->grbm_idx_mutex); > for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { > - gfx_v7_0_select_se_sh(adev, i, j); > + gfx_v7_0_select_se_sh(adev, i, j, 0); > for (k = 0; k < adev->usec_timeout; k++) { > if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) > break; > @@ -3389,7 +3390,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) > } > } > } > - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > mutex_unlock(&adev->grbm_idx_mutex); > > mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | > @@ -3549,7 +3550,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) > WREG32(mmRLC_LB_CNTR_MAX, 0x00008000); > > mutex_lock(&adev->grbm_idx_mutex); > - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff); > WREG32(mmRLC_LB_PARAMS, 0x00600408); > WREG32(mmRLC_LB_CNTL, 0x80000004); > @@ -3589,7 +3590,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) > tmp = gfx_v7_0_halt_rlc(adev); > > mutex_lock(&adev->grbm_idx_mutex); > - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); > WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); > tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | > @@ -3640,7 +3641,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) > tmp = gfx_v7_0_halt_rlc(adev); > > mutex_lock(&adev->grbm_idx_mutex); > - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); > WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); > data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | > @@ -3691,7 +3692,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) > tmp = gfx_v7_0_halt_rlc(adev); > > mutex_lock(&adev->grbm_idx_mutex); > - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); > WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); > data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK; > @@ -5055,7 +5056,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) > mask = 1; > ao_bitmap = 0; > counter = 0; > - gfx_v7_0_select_se_sh(adev, i, j); > + gfx_v7_0_select_se_sh(adev, i, j, 0); > bitmap = gfx_v7_0_get_cu_active_bitmap(adev); > cu_info->bitmap[i][j] = bitmap; > > @@ -5071,7 +5072,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) > ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); > } > } > - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > mutex_unlock(&adev->grbm_idx_mutex); > > cu_info->number = active_cu_number; > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 9aa30bc06e4a..45c3ad52d21c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -3447,10 +3447,11 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) > } > > static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, > - u32 se_num, u32 sh_num) > + u32 se_num, u32 sh_num, u32 instance) > { > u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); > > + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); > if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { > data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); > data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); > @@ -3499,13 +3500,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) > mutex_lock(&adev->grbm_idx_mutex); > for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { > - gfx_v8_0_select_se_sh(adev, i, j); > + gfx_v8_0_select_se_sh(adev, i, j, 0); > data = gfx_v8_0_get_rb_active_bitmap(adev); > active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * > rb_bitmap_width_per_sh); > } > } > - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > mutex_unlock(&adev->grbm_idx_mutex); > > adev->gfx.config.backend_enable_mask = active_rbs; > @@ -3609,7 +3610,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) > * making sure that the following register writes will be broadcasted > * to all the shaders > */ > - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > > WREG32(mmPA_SC_FIFO_SIZE, > (adev->gfx.config.sc_prim_fifo_size_frontend << > @@ -3632,7 +3633,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) > mutex_lock(&adev->grbm_idx_mutex); > for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { > for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { > - gfx_v8_0_select_se_sh(adev, i, j); > + gfx_v8_0_select_se_sh(adev, i, j, 0); > for (k = 0; k < adev->usec_timeout; k++) { > if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) > break; > @@ -3640,7 +3641,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) > } > } > } > - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > mutex_unlock(&adev->grbm_idx_mutex); > > mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | > @@ -5409,7 +5410,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, > { > uint32_t data; > > - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > > WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); > WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); > @@ -6501,7 +6502,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) > mask = 1; > ao_bitmap = 0; > counter = 0; > - gfx_v8_0_select_se_sh(adev, i, j); > + gfx_v8_0_select_se_sh(adev, i, j, 0); > bitmap = gfx_v8_0_get_cu_active_bitmap(adev); > cu_info->bitmap[i][j] = bitmap; > > @@ -6517,7 +6518,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) > ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); > } > } > - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); > + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > mutex_unlock(&adev->grbm_idx_mutex); > > cu_info->number = active_cu_number; > diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c > index c628a09f84d7..c7762a554e6b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vi.c > +++ b/drivers/gpu/drm/amd/amdgpu/vi.c > @@ -534,12 +534,12 @@ static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num, > > mutex_lock(&adev->grbm_idx_mutex); > if (se_num != 0xffffffff || sh_num != 0xffffffff) > - amdgpu_gfx_select_se_sh(adev, se_num, sh_num); > + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0); > > val = RREG32(reg_offset); > > if (se_num != 0xffffffff || sh_num != 0xffffffff) > - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff); > + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0); > mutex_unlock(&adev->grbm_idx_mutex); > return val; > } >