From: Xiangliang Yu <Xiangliang.Yu@xxxxxxx> Insert ce meta prior to cntx_cntl and de follow it. Signed-off-by: Xiangliang Yu <Xiangliang.Yu at amd.com> Signed-off-by: Monk Liu <Monk.Liu at amd.com> Reviewed-by: Alex Deucher <alexander.deucher at amd.com> Acked-by: Christian K�¶nig <christian.koenig at amd.com> Signed-off-by: Alex Deucher <alexander.deucher at amd.com> --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 47 ++++++++++++++++++++++ drivers/gpu/drm/amd/include/v9_structs.h | 68 ++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6f266d0..2241075 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3189,10 +3189,54 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } +static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) +{ + static struct v9_ce_ib_state ce_payload = {0}; + uint64_t csa_addr; + int cnt; + + cnt = (sizeof(ce_payload) >> 2) + 4 - 2; + csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); + amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); +} + +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) +{ + static struct v9_de_ib_state de_payload = {0}; + uint64_t csa_addr, gds_addr; + int cnt; + + csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; + gds_addr = csa_addr + 4096; + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + + cnt = (sizeof(de_payload) >> 2) + 4 - 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); + amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); +} + static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { uint32_t dw2 = 0; + if (amdgpu_sriov_vf(ring->adev)) + gfx_v9_0_ring_emit_ce_meta(ring); + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { /* set load_global_config & load_global_uconfig */ @@ -3216,6 +3260,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, 0); + + if (amdgpu_sriov_vf(ring->adev)) + gfx_v9_0_ring_emit_de_meta(ring); } static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index e7508a3..9a9e6c7 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -672,4 +672,72 @@ struct v9_mqd { uint32_t reserved_511; }; +/* from vega10 all CSA format is shifted to chain ib compatible mode */ +struct v9_ce_ib_state { + /* section of non chained ib part */ + uint32_t ce_ib_completion_status; + uint32_t ce_constegnine_count; + uint32_t ce_ibOffset_ib1; + uint32_t ce_ibOffset_ib2; + + /* section of chained ib */ + uint32_t ce_chainib_addrlo_ib1; + uint32_t ce_chainib_addrlo_ib2; + uint32_t ce_chainib_addrhi_ib1; + uint32_t ce_chainib_addrhi_ib2; + uint32_t ce_chainib_size_ib1; + uint32_t ce_chainib_size_ib2; +}; /* total 10 DWORD */ + +struct v9_de_ib_state { + /* section of non chained ib part */ + uint32_t ib_completion_status; + uint32_t de_constEngine_count; + uint32_t ib_offset_ib1; + uint32_t ib_offset_ib2; + + /* section of chained ib */ + uint32_t chain_ib_addrlo_ib1; + uint32_t chain_ib_addrlo_ib2; + uint32_t chain_ib_addrhi_ib1; + uint32_t chain_ib_addrhi_ib2; + uint32_t chain_ib_size_ib1; + uint32_t chain_ib_size_ib2; + + /* section of non chained ib part */ + uint32_t preamble_begin_ib1; + uint32_t preamble_begin_ib2; + uint32_t preamble_end_ib1; + uint32_t preamble_end_ib2; + + /* section of chained ib */ + uint32_t chain_ib_pream_addrlo_ib1; + uint32_t chain_ib_pream_addrlo_ib2; + uint32_t chain_ib_pream_addrhi_ib1; + uint32_t chain_ib_pream_addrhi_ib2; + + /* section of non chained ib part */ + uint32_t draw_indirect_baseLo; + uint32_t draw_indirect_baseHi; + uint32_t disp_indirect_baseLo; + uint32_t disp_indirect_baseHi; + uint32_t gds_backup_addrlo; + uint32_t gds_backup_addrhi; + uint32_t index_base_addrlo; + uint32_t index_base_addrhi; + uint32_t sample_cntl; +}; /* Total of 27 DWORD */ + +struct v9_gfx_meta_data { + /* 10 DWORD, address must be 4KB aligned */ + struct v9_ce_ib_state ce_payload; + uint32_t reserved1[54]; + /* 27 DWORD, address must be 64B aligned */ + struct v9_de_ib_state de_payload; + /* PFP IB base address which get pre-empted */ + uint32_t DeIbBaseAddrLo; + uint32_t DeIbBaseAddrHi; + uint32_t reserved2[931]; +}; /* Total of 4K Bytes */ + #endif /* V9_STRUCTS_H_ */ -- 2.5.5