On 6/26/2024 8:31 AM, Jane Jian wrote: > [WHY] > sriov has the higher bit violation when flushing tlb > > [HOW] > normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit violation > RLCG will mask xcd out and always assume it's accessing its own xcd > > Signed-off-by: Jane Jian <Jane.Jian@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 29 +++++++++++++++++++++++++ > 1 file changed, 29 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > index 8d8763ebe027..87a6a610e467 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > @@ -55,6 +55,14 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); > #define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ > #define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ > > +#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */ > +#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */ > +#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ > +#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */ > + > +#define NORMALIZE_XCC_REG_OFFSET(offset) \ > + (offset & 0xFFFF) > + > struct amdgpu_gfx_ras gfx_v9_4_3_ras; > > static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); > @@ -217,9 +225,24 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) > } > } > > +static uint32_t gfx_v9_4_3_normalize_xcc_reg_offset(uint32_t reg) > +{ > + uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg); > + > + /* If it is an XCC reg, normalize the reg to keep > + lower 16 bits in local xcc */ > + > + if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) || > + ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH))) > + return normalized_reg; > + else > + return reg; > +} > + > static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, > bool wc, uint32_t reg, uint32_t val) > { > + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); > amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | > WRITE_DATA_DST_SEL(0) | > @@ -234,6 +257,8 @@ static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, > uint32_t addr1, uint32_t ref, uint32_t mask, > uint32_t inv) > { > + addr0 = gfx_v9_4_3_normalize_xcc_reg_offset(addr0); > + addr1 = gfx_v9_4_3_normalize_xcc_reg_offset(addr1); I guess, this should be done only if it's regspace. Apart from that, this looks good. Thanks, Lijo > amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); > amdgpu_ring_write(ring, > /* memory (1) or register (0) */ > @@ -2725,6 +2750,8 @@ static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, > { > struct amdgpu_device *adev = ring->adev; > > + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); > + > amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); > amdgpu_ring_write(ring, 0 | /* src: register*/ > (5 << 8) | /* dst: memory */ > @@ -2742,6 +2769,8 @@ static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, > { > uint32_t cmd = 0; > > + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); > + > switch (ring->funcs->type) { > case AMDGPU_RING_TYPE_GFX: > cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;