Am 29.08.2016 um 04:55 schrieb Monk Liu: > CE & DE can have most up to 128dw as the gap between them > so to sync CE nad DE we don't need double SWITCH_BUFFERs any > more, which is urgly and harm performance, we only need > insert 128NOP after VM flush to prevent CE vm fault. > > Change-Id: Ibec954ce4c817ad7d3bce89c2bcb95b6c6bb5411 > Signed-off-by: Monk Liu <Monk.Liu at amd.com> Looks good to me, but only the GFX engines have a CE. So syncing on the compute engines is pretty much pointless. So I suggest that you move this into the "usepfp" if branch as well. With that fixed the patch is Reviewed-by: Christian König <christian.koenig at amd.com>. Regards, Christian. > --- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 18 ++++++------------ > 1 file changed, 6 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 26fced0..ce1e616 100755 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -6005,14 +6005,6 @@ static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) > amdgpu_ring_write(ring, seq); > amdgpu_ring_write(ring, 0xffffffff); > amdgpu_ring_write(ring, 4); /* poll interval */ > - > - if (usepfp) { > - /* synce CE with ME to prevent CE fetch CEIB before context switch done */ > - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > - } > } > > static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > @@ -6020,6 +6012,9 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > { > int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); > > + /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */ > + amdgpu_ring_insert_nop(ring, 128); > + > amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); > amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | > WRITE_DATA_DST_SEL(0)) | > @@ -6059,11 +6054,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, > /* sync PFP to ME, otherwise we might get invalid PFP reads */ > amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); > amdgpu_ring_write(ring, 0x0); > - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); > - amdgpu_ring_write(ring, 0); > } > + > + /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */ > + amdgpu_ring_insert_nop(ring, 128); > } > > static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)