Virtualization world switch need each command that is submitted into GFX with an extra entry, which will using WRITE_DATA to fullfill CSA. In this way, CP will save CE/DE snapshots when preemption occurred and restore it later. Signed-off-by: Monk Liu <Monk.Liu at amd.com> Signed-off-by: Xiangliang Yu <Xiangliang.Yu at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 + drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h | 39 ++++++++++++++ drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c | 88 ++++++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index acf48de..cc35255 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -175,6 +175,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->emit_hdp_flush) amdgpu_ring_emit_hdp_flush(ring); + amdgpu_gfx_ring_emit_meta_data(ring, vm); + /* always set cond_exec_polling to CONTINUE */ *ring->cond_exe_cpu_addr = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index dff1248..d6f57a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -69,4 +69,6 @@ int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev); /* Context Save Area functions */ int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring, + struct amdgpu_vm *vm); #endif diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h index a25e07f..cc3123b 100644 --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h @@ -26,9 +26,48 @@ #include "amdgpu.h" +/* context save area structures */ +struct amdgpu_ce_ib_state { + uint32_t ce_ib_completion_status; + uint32_t ce_const_engine_count; + uint32_t ce_ib_offset_ib1; + uint32_t ce_ib_offset_ib2; +}; + +struct amdgpu_de_ib_state { + uint32_t de_ib_completion_status; + uint32_t de_const_engine_count; + uint32_t de_ib_offset_ib1; + uint32_t de_ib_offset_ib2; + uint32_t preamble_begin_ib1; + uint32_t preamble_begin_ib2; + uint32_t preamble_end_ib1; + uint32_t preamble_end_ib2; + uint32_t draw_indirect_base_lo; + uint32_t draw_indirect_base_hi; + uint32_t disp_indirect_base_lo; + uint32_t disp_indirect_base_hi; + uint32_t gds_backup_addr_lo; + uint32_t gds_backup_addr_hi; + uint32_t index_base_addr_lo; + uint32_t index_base_addr_hi; + uint32_t sample_cntl; +}; + +struct amdgpu_gfx_meta_data { + struct amdgpu_ce_ib_state ce_payload; + uint32_t reserved1[60]; + struct amdgpu_de_ib_state de_payload; + uint32_t de_ib_base_addr_lo; + uint32_t de_ib_base_addr_hi; + uint32_t reserved2[941]; +}; + /* xgpu structures */ struct amd_xgpu_csa { struct amdgpu_bo *robj; + struct amdgpu_ce_ib_state ce_payload; + struct amdgpu_de_ib_state de_payload; uint64_t gpu_addr; uint64_t gds_addr; int32_t size; diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c index 246a747..6d4246c 100644 --- a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c @@ -207,3 +207,91 @@ void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) sa = &xgpu->sa; xgpu_vm_unmap_csa(adev, vm, sa); } + +static void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, + void *src, int count_dw) +{ + if (ring->count_dw < count_dw) + DRM_ERROR("writing more dwords to the ring than expected:%d.\n", + count_dw); + else { + unsigned int chunk1, chunk2; + void *dst = (void *)&ring->ring[ring->wptr]; + + chunk1 = ring->ptr_mask + 1 - ring->wptr; + chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1; + chunk2 = count_dw - chunk1; + chunk1 <<= 2; + chunk2 <<= 2; + if (chunk1) { + memcpy(dst, src, chunk1); + dst = (void *)(((uint64_t)dst + chunk1) & + ring->ptr_mask); + } + + if (chunk2) { + src += chunk1; + dst = (void *)ring->ring; + memcpy(dst, src, chunk2); + } + + ring->wptr += count_dw; + ring->wptr &= ring->ptr_mask; + ring->count_dw -= count_dw; + } +} + +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring, + struct amdgpu_vm *vm) +{ + struct amdgpu_ce_ib_state *ce_payload; + struct amdgpu_de_ib_state *de_payload; + struct amd_xgpu_csa *sa = NULL; + struct amd_xgpu *xgpu = (struct amd_xgpu *)ring->adev->priv_data; + uint64_t csa_addr, gds_addr; + int cnt; + + if (!xgpu || (ring->funcs->type != AMDGPU_RING_TYPE_GFX)) + return; + + sa = &xgpu->sa; + + ce_payload = &sa->ce_payload; + de_payload = &sa->de_payload; + memset(ce_payload, 0, sizeof(*ce_payload)); + memset(de_payload, 0, sizeof(*de_payload)); + + cnt = (sizeof(*ce_payload) >> 2) + 4 - 2; + csa_addr = vm ? vm->csa.csa_addr : sa->gpu_addr; + gds_addr = vm ? vm->csa.gds_addr : sa->gds_addr; + de_payload->gds_backup_addr_lo = lower_32_bits(gds_addr); + de_payload->gds_backup_addr_hi = upper_32_bits(gds_addr); + + /* write CE meta data */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(2) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + + offsetof(struct amdgpu_gfx_meta_data, ce_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + + offsetof(struct amdgpu_gfx_meta_data, ce_payload))); + + amdgpu_ring_write_multiple(ring, (void *)ce_payload, + sizeof(*ce_payload) >> 2); + + /* write DE meta data */ + cnt = (sizeof(*de_payload) >> 2) + 4 - 2; + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(1) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(csa_addr + + offsetof(struct amdgpu_gfx_meta_data, de_payload))); + amdgpu_ring_write(ring, upper_32_bits(csa_addr + + offsetof(struct amdgpu_gfx_meta_data, de_payload))); + + amdgpu_ring_write_multiple(ring, (void *)de_payload, + sizeof(*de_payload) >> 2); +} -- 2.7.4