[WHY] sriov has the higher bit violation when flushing tlb [HOW] normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit violation RLCG will mask xcd out and always assume it's accessing its own xcd v2 add check in wait mem that only do the normalization on regspace Signed-off-by: Jane Jian <Jane.Jian@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 33 +++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 8d8763ebe027..1149595a02d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -55,6 +55,14 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); #define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ #define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ +#define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */ +#define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */ +#define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ +#define XCC_REG_RANGE_1_HIGH 0x10000 /* XCC gfxdec1 upper Bound */ + +#define NORMALIZE_XCC_REG_OFFSET(offset) \ + (offset & 0xFFFF) + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -217,9 +225,24 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) } } +static uint32_t gfx_v9_4_3_normalize_xcc_reg_offset(uint32_t reg) +{ + uint32_t normalized_reg = NORMALIZE_XCC_REG_OFFSET(reg); + + /* If it is an XCC reg, normalize the reg to keep + lower 16 bits in local xcc */ + + if (((normalized_reg >= XCC_REG_RANGE_0_LOW) && (normalized_reg < XCC_REG_RANGE_0_HIGH)) || + ((normalized_reg >= XCC_REG_RANGE_1_LOW) && (normalized_reg < XCC_REG_RANGE_1_HIGH))) + return normalized_reg; + else + return reg; +} + static void gfx_v9_4_3_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, bool wc, uint32_t reg, uint32_t val) { + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | WRITE_DATA_DST_SEL(0) | @@ -234,6 +257,12 @@ static void gfx_v9_4_3_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, uint32_t addr1, uint32_t ref, uint32_t mask, uint32_t inv) { + /* Only do the normalization on regspace */ + if (mem_space == 0) { + addr0 = gfx_v9_4_3_normalize_xcc_reg_offset(addr0); + addr1 = gfx_v9_4_3_normalize_xcc_reg_offset(addr1); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); amdgpu_ring_write(ring, /* memory (1) or register (0) */ @@ -2725,6 +2754,8 @@ static void gfx_v9_4_3_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, { struct amdgpu_device *adev = ring->adev; + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); amdgpu_ring_write(ring, 0 | /* src: register*/ (5 << 8) | /* dst: memory */ @@ -2742,6 +2773,8 @@ static void gfx_v9_4_3_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, { uint32_t cmd = 0; + reg = gfx_v9_4_3_normalize_xcc_reg_offset(reg); + switch (ring->funcs->type) { case AMDGPU_RING_TYPE_GFX: cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; -- 2.34.1