On 2017å¹´05æ??04æ?¥ 18:04, Monk Liu wrote: > VI/AI affected: > > CP/HW team requires KMD insert FRAME_CONTROL(end) after > the last IB and before the fence of this DMAframe. > > this is to make sure the cache are flushed, and it's a must > change no matter MCBP/SR-IOV or bare-metal case because new > CP hw won't do the cache flush for each IB anymore, it just > leaves it to KMD now. > > with this patch, certain MCBP hang issue when rendering > vulkan/chained-ib are resolved. > > Change-Id: I34ee7528aa32e704b2850bc6d50774b24c29b840 > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 3 +++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 11 +++++++++++ > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 11 +++++++++++ > include/uapi/drm/amdgpu_drm.h | 2 ++ > 5 files changed, 28 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 4480e01..df49709 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -201,6 +201,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ > continue; > > + if (i == num_ibs - 1) > + ib->flags |= AMDGPU_IB_FLAG_LAST; We can wrap the ring func like amdgpu_ring_emit_tmz(), and add it before emit fence. Regards, David Zhou > + > amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, > need_ctx_switch); > need_ctx_switch = false; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 2acaac6..03e88c6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -143,6 +143,7 @@ struct amdgpu_ring_funcs { > void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); > void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); > void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); > + void (*emit_tmz)(struct amdgpu_ring *ring, bool start); > }; > > struct amdgpu_ring { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index eed5745..f4aeba6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -6422,6 +6422,10 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, > (ib->gpu_addr & 0xFFFFFFFC)); > amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); > amdgpu_ring_write(ring, control); > + > + /* insert FRAME_CONTROL (end) after the last IB */ > + if (ib->flags & AMDGPU_IB_FLAG_LAST && ring->funcs->emit_tmz) > + ring->funcs->emit_tmz(ring, false); > } > > static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, > @@ -6651,6 +6655,12 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne > ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; > } > > +static void gfx_v8_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) > +{ > + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); > + amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ > +} > + > > static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) > { > @@ -6932,6 +6942,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { > .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, > .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, > .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, > + .emit_tmz = gfx_v8_0_ring_emit_tmz, > }; > > static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index f192297..745e2f7 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -2899,6 +2899,10 @@ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ > lower_32_bits(ib->gpu_addr)); > amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); > amdgpu_ring_write(ring, control); > + > + /* insert FRAME_CONTROL (end) after the last IB */ > + if (ib->flags & AMDGPU_IB_FLAG_LAST && ring->funcs->emit_tmz) > + ring->funcs->emit_tmz(ring, false); > } > > #define INDIRECT_BUFFER_VALID (1 << 23) > @@ -3156,6 +3160,12 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne > ring->ring[offset] = (ring->ring_size>>2) - offset + cur; > } > > +static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) > +{ > + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); > + amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ > +} > + > static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) > { > struct amdgpu_device *adev = ring->adev; > @@ -3490,6 +3500,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { > .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, > .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, > .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, > + .emit_tmz = gfx_v9_0_ring_emit_tmz, > }; > > static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h > index 20ea895..e979285 100644 > --- a/include/uapi/drm/amdgpu_drm.h > +++ b/include/uapi/drm/amdgpu_drm.h > @@ -478,6 +478,8 @@ union drm_amdgpu_cs { > /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ > #define AMDGPU_IB_FLAG_PREEMPT (1<<2) > > +#define AMDGPU_IB_FLAG_LAST (1<<3) > + > struct drm_amdgpu_cs_chunk_ib { > __u32 _pad; > /** AMDGPU_IB_FLAG_* */