> -----Original Message----- > From: Alex Deucher [mailto:alexdeucher at gmail.com] > Sent: Tuesday, December 20, 2016 7:23 AM > To: Yu, Xiangliang <Xiangliang.Yu at amd.com> > Cc: amd-gfx list <amd-gfx at lists.freedesktop.org>; > dl.SRDC_SW_GPUVirtualization > <dl.SRDC_SW_GPUVirtualization at amd.com>; Liu, Monk > <Monk.Liu at amd.com> > Subject: Re: [PATCH 12/23] drm/amdgpu: Insert meta data during submitting > IB > > On Sat, Dec 17, 2016 at 11:16 AM, Xiangliang Yu <Xiangliang.Yu at amd.com> > wrote: > > Virtualization world switch need each command that is submitted into > > GFX with an extra entry, which will using WRITE_DATA to fullfill CSA. > > In this way, CP will save CE/DE snapshots when preemption occurred and > > restore it later. > > > > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > > Signed-off-by: Xiangliang Yu <Xiangliang.Yu at amd.com> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 + > > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 2 + > > drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h | 39 ++++++++++++++ > > drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c | 88 > ++++++++++++++++++++++++++++++++ > > 4 files changed, 131 insertions(+) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > > index acf48de..cc35255 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > > @@ -175,6 +175,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, > unsigned num_ibs, > > if (ring->funcs->emit_hdp_flush) > > amdgpu_ring_emit_hdp_flush(ring); > > > > + amdgpu_gfx_ring_emit_meta_data(ring, vm); > > + > > /* always set cond_exec_polling to CONTINUE */ > > *ring->cond_exe_cpu_addr = 1; > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > > index dff1248..d6f57a2 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > > @@ -69,4 +69,6 @@ int amd_xgpu_set_ip_blocks(struct amdgpu_device > > *adev); > > /* Context Save Area functions */ > > int amdgpu_vm_map_csa(struct amdgpu_device *adev, struct > amdgpu_vm > > *vm); void amdgpu_vm_unmap_csa(struct amdgpu_device *adev, struct > > amdgpu_vm *vm); > > +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring, > > + struct amdgpu_vm *vm); > > #endif > > diff --git a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h > > b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h > > index a25e07f..cc3123b 100644 > > --- a/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h > > +++ b/drivers/gpu/drm/amd/mxgpu/amd_mxgpu.h > > @@ -26,9 +26,48 @@ > > > > #include "amdgpu.h" > > > > +/* context save area structures */ > > +struct amdgpu_ce_ib_state { > > + uint32_t ce_ib_completion_status; > > + uint32_t ce_const_engine_count; > > + uint32_t ce_ib_offset_ib1; > > + uint32_t ce_ib_offset_ib2; > > +}; > > + > > +struct amdgpu_de_ib_state { > > + uint32_t de_ib_completion_status; > > + uint32_t de_const_engine_count; > > + uint32_t de_ib_offset_ib1; > > + uint32_t de_ib_offset_ib2; > > + uint32_t preamble_begin_ib1; > > + uint32_t preamble_begin_ib2; > > + uint32_t preamble_end_ib1; > > + uint32_t preamble_end_ib2; > > + uint32_t draw_indirect_base_lo; > > + uint32_t draw_indirect_base_hi; > > + uint32_t disp_indirect_base_lo; > > + uint32_t disp_indirect_base_hi; > > + uint32_t gds_backup_addr_lo; > > + uint32_t gds_backup_addr_hi; > > + uint32_t index_base_addr_lo; > > + uint32_t index_base_addr_hi; > > + uint32_t sample_cntl; > > +}; > > + > > +struct amdgpu_gfx_meta_data { > > + struct amdgpu_ce_ib_state ce_payload; > > + uint32_t reserved1[60]; > > + struct amdgpu_de_ib_state de_payload; > > + uint32_t de_ib_base_addr_lo; > > + uint32_t de_ib_base_addr_hi; > > + uint32_t reserved2[941]; > > +}; > > + > > These are gfx8 specific and should be moved to gfx8 module. I think it is only relate to virtualization right now. I'm fine to move it out if support whole preemption solution later. > > > /* xgpu structures */ > > struct amd_xgpu_csa { > > struct amdgpu_bo *robj; > > + struct amdgpu_ce_ib_state ce_payload; > > + struct amdgpu_de_ib_state de_payload; > > uint64_t gpu_addr; > > uint64_t gds_addr; > > int32_t size; > > diff --git a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c > > b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c > > index 246a747..6d4246c 100644 > > --- a/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c > > +++ b/drivers/gpu/drm/amd/mxgpu/mxgpu_csa.c > > @@ -207,3 +207,91 @@ void amdgpu_vm_unmap_csa(struct > amdgpu_device *adev, struct amdgpu_vm *vm) > > sa = &xgpu->sa; > > xgpu_vm_unmap_csa(adev, vm, sa); } > > + > > +static void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, > > + void *src, int count_dw) { > > + if (ring->count_dw < count_dw) > > + DRM_ERROR("writing more dwords to the ring than > expected:%d.\n", > > + count_dw); > > + else { > > + unsigned int chunk1, chunk2; > > + void *dst = (void *)&ring->ring[ring->wptr]; > > + > > + chunk1 = ring->ptr_mask + 1 - ring->wptr; > > + chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1; > > + chunk2 = count_dw - chunk1; > > + chunk1 <<= 2; > > + chunk2 <<= 2; > > + if (chunk1) { > > + memcpy(dst, src, chunk1); > > + dst = (void *)(((uint64_t)dst + chunk1) & > > + ring->ptr_mask); > > + } > > + > > + if (chunk2) { > > + src += chunk1; > > + dst = (void *)ring->ring; > > + memcpy(dst, src, chunk2); > > + } > > + > > + ring->wptr += count_dw; > > + ring->wptr &= ring->ptr_mask; > > + ring->count_dw -= count_dw; > > + } > > +} > > + > > +void amdgpu_gfx_ring_emit_meta_data(struct amdgpu_ring *ring, > > + struct amdgpu_vm *vm) { > > + struct amdgpu_ce_ib_state *ce_payload; > > + struct amdgpu_de_ib_state *de_payload; > > + struct amd_xgpu_csa *sa = NULL; > > + struct amd_xgpu *xgpu = (struct amd_xgpu *)ring->adev->priv_data; > > + uint64_t csa_addr, gds_addr; > > + int cnt; > > + > > + if (!xgpu || (ring->funcs->type != AMDGPU_RING_TYPE_GFX)) > > + return; > > + > > + sa = &xgpu->sa; > > No need to make this dependent on xgpu. As I said in the previous patch > preemption is useful independent of xgpu. > Ditto > > + > > + ce_payload = &sa->ce_payload; > > + de_payload = &sa->de_payload; > > + memset(ce_payload, 0, sizeof(*ce_payload)); > > + memset(de_payload, 0, sizeof(*de_payload)); > > + > > + cnt = (sizeof(*ce_payload) >> 2) + 4 - 2; > > + csa_addr = vm ? vm->csa.csa_addr : sa->gpu_addr; > > + gds_addr = vm ? vm->csa.gds_addr : sa->gds_addr; > > + de_payload->gds_backup_addr_lo = lower_32_bits(gds_addr); > > + de_payload->gds_backup_addr_hi = upper_32_bits(gds_addr); > > + > > + /* write CE meta data */ > > + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); > > + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(2) | > > + WRITE_DATA_DST_SEL(8) | > > + WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0)); > > + amdgpu_ring_write(ring, lower_32_bits(csa_addr + > > + offsetof(struct amdgpu_gfx_meta_data, ce_payload))); > > + amdgpu_ring_write(ring, upper_32_bits(csa_addr + > > + offsetof(struct amdgpu_gfx_meta_data, > > + ce_payload))); > > + > > + amdgpu_ring_write_multiple(ring, (void *)ce_payload, > > + sizeof(*ce_payload) >> 2); > > + > > + /* write DE meta data */ > > + cnt = (sizeof(*de_payload) >> 2) + 4 - 2; > > + > > + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); > > + amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(1) | > > + WRITE_DATA_DST_SEL(8) | > > + WR_CONFIRM | WRITE_DATA_CACHE_POLICY(0)); > > + amdgpu_ring_write(ring, lower_32_bits(csa_addr + > > + offsetof(struct amdgpu_gfx_meta_data, de_payload))); > > + amdgpu_ring_write(ring, upper_32_bits(csa_addr + > > + offsetof(struct amdgpu_gfx_meta_data, > > + de_payload))); > > + > > + amdgpu_ring_write_multiple(ring, (void *)de_payload, > > + sizeof(*de_payload) >> 2); } > > This function is gfx8 specific and should be moved to the gfx8 module. > Ditto > Alex > > > -- > > 2.7.4 > > > > _______________________________________________ > > amd-gfx mailing list > > amd-gfx at lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/amd-gfx