On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu at amd.com> wrote: > Virtualization world switch need each command that is submitted > into GFX with an extra entry, which will using WRITE_DATA to > fullfill CSA. In this way, CP will save CE/DE snapshots when > preemption occurred and restore it later. > > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > Signed-off-by: Xiangliang Yu <Xiangliang.Yu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 + > drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h | 39 ++++++++++++++ > drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c | 88 ++++++++++++++++++++++++++++++++ > 4 files changed, 131 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index acf48de..cc35255 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -175,6 +175,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > if (ring->funcs->emit_hdp_flush) > amdgpu_ring_emit_hdp_flush(ring); > > + amdgpu_gfx_ring_emit_meta_data(ring, vm); > + > /* always set cond_exec_polling to CONTINUE */ > *ring->cond_exe_cpu_addr = 1; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index dff1248..d6f57a2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -69,4 +69,6 @@ int amd_xgpu_set_ip_blocks(struct amdgpu_device *adev); > /* Context Save Area functions */ > int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); > void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm); > +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring, > + struct amdgpu_vm *vm); > #endif > diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h > index a25e07f..cc3123b 100644 > --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h > +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h > @@ -26,9 +26,48 @@ > > #include "amdgpu.h" > > +/* context save area structures */ > +struct amdgpu_ce_ib_state { > + uint32_t ce_ib_completion_status; > + uint32_t ce_const_engine_count; > + uint32_t ce_ib_offset_ib1; > + uint32_t ce_ib_offset_ib2; > +}; > + > +struct amdgpu_de_ib_state { > + uint32_t de_ib_completion_status; > + uint32_t de_const_engine_count; > + uint32_t de_ib_offset_ib1; > + uint32_t de_ib_offset_ib2; > + uint32_t preamble_begin_ib1; > + uint32_t preamble_begin_ib2; > + uint32_t preamble_end_ib1; > + uint32_t preamble_end_ib2; > + uint32_t draw_indirect_base_lo; > + uint32_t draw_indirect_base_hi; > + uint32_t disp_indirect_base_lo; > + uint32_t disp_indirect_base_hi; > + uint32_t gds_backup_addr_lo; > + uint32_t gds_backup_addr_hi; > + uint32_t index_base_addr_lo; > + uint32_t index_base_addr_hi; > + uint32_t sample_cntl; > +}; > + > +struct amdgpu_gfx_meta_data { > + struct amdgpu_ce_ib_state ce_payload; > + uint32_t reserved1[60]; > + struct amdgpu_de_ib_state de_payload; > + uint32_t de_ib_base_addr_lo; > + uint32_t de_ib_base_addr_hi; > + uint32_t reserved2[941]; > +}; > + These are gfx8 specific and should be moved to gfx8 module. > /* xgpu structures */ > struct amd_xgpu_csa { > struct amdgpu_bo *robj; > + struct amdgpu_ce_ib_state ce_payload; > + struct amdgpu_de_ib_state de_payload; > uint64_t gpu_addr; > uint64_t gds_addr; > int32_t size; > diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c > index 246a747..6d4246c 100644 > --- a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c > +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c > @@ -207,3 +207,91 @@ void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm) > sa = &xgpu->sa; > xgpu_vm_unmap_csa(adev, vm, sa); > } > + > +static void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, > + void *src, int count_dw) > +{ > + if (ring->count_dw < count_dw) > + DRM_ERROR("writing more dwords to the ring than expected:%d.\n", > + count_dw); > + else { > + unsigned int chunk1, chunk2; > + void *dst = (void *)&ring->ring[ring->wptr]; > + > + chunk1 = ring->ptr_mask + 1 - ring->wptr; > + chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1; > + chunk2 = count_dw - chunk1; > + chunk1 <<= 2; > + chunk2 <<= 2; > + if (chunk1) { > + memcpy(dst, src, chunk1); > + dst = (void *)(((uint64_t)dst + chunk1) & > + ring->ptr_mask); > + } > + > + if (chunk2) { > + src += chunk1; > + dst = (void *)ring->ring; > + memcpy(dst, src, chunk2); > + } > + > + ring->wptr += count_dw; > + ring->wptr &= ring->ptr_mask; > + ring->count_dw -= count_dw; > + } > +} > + > +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring, > + struct amdgpu_vm *vm) > +{ > + struct amdgpu_ce_ib_state *ce_payload; > + struct amdgpu_de_ib_state *de_payload; > + struct amd_xgpu_csa *sa = NULL; > + struct amd_xgpu *xgpu = (struct amd_xgpu *)ring->adev->priv_data; > + uint64_t csa_addr, gds_addr; > + int cnt; > + > + if (!xgpu || (ring->funcs->type != AMDGPU_RING_TYPE_GFX)) > + return; > + > + sa = &xgpu->sa; No need to make this dependent on xgpu. As I said in the previous patch preemption is useful independent of xgpu. > + > + ce_payload = &sa->ce_payload; > + de_payload = &sa->de_payload; > + memset(ce_payload, 0, sizeof(*ce_payload)); > + memset(de_payload, 0, sizeof(*de_payload)); > + > + cnt = (sizeof(*ce_payload) >> 2) + 4 - 2; > + csa_addr = vm ? vm->csa.csa_addr : sa->gpu_addr; > + gds_addr = vm ? vm->csa.gds_addr : sa->gds_addr; > + de_payload->gds_backup_addr_lo = lower_32_bits(gds_addr); > + de_payload->gds_backup_addr_hi = upper_32_bits(gds_addr); > + > + /* write CE meta data */ > + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); > + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(2) | > + WRITE_DATA_DST_SEL(8) | > + WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0)); > + amdgpu_ring_write(ring, lower_32_bits(csa_addr + > + offsetof(struct amdgpu_gfx_meta_data, ce_payload))); > + amdgpu_ring_write(ring, upper_32_bits(csa_addr + > + offsetof(struct amdgpu_gfx_meta_data, ce_payload))); > + > + amdgpu_ring_write_multiple(ring, (void *)ce_payload, > + sizeof(*ce_payload) >> 2); > + > + /* write DE meta data */ > + cnt = (sizeof(*de_payload) >> 2) + 4 - 2; > + > + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); > + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(1) | > + WRITE_DATA_DST_SEL(8) | > + WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0)); > + amdgpu_ring_write(ring, lower_32_bits(csa_addr + > + offsetof(struct amdgpu_gfx_meta_data, de_payload))); > + amdgpu_ring_write(ring, upper_32_bits(csa_addr + > + offsetof(struct amdgpu_gfx_meta_data, de_payload))); > + > + amdgpu_ring_write_multiple(ring, (void *)de_payload, > + sizeof(*de_payload) >> 2); > +} This function is gfx8 specific and should be moved to the gfx8 module. Alex > -- > 2.7.4 > > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx