Since commit "Move to a per-IB secure flag (TMZ)", we've been seeing hangs in GFX. Ray H. pointed out by sending a patch that we need to send FRAME CONTROL stop/start back-to-back, every time we flip the TMZ flag as per each IB we submit. That is, when we transition from TMZ to non-TMZ we have to send a stop with TMZ followed by a start with non-TMZ, and similarly for transitioning from non-TMZ into TMZ. This patch implements this, thus fixing the GFX hang. Signed-off-by: Luben Tuikov <luben.tuikov@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 87 +++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 ++-- 4 files changed, 79 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4b2342d11520..16d6df3304d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -216,40 +216,75 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_emit_cntxcntl(ring, status); } - secure = false; + /* Find the first non-preamble IB. + */ for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; /* drop preamble IBs if we don't have a context switch */ - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && - skip_preamble && - !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && - !amdgpu_mcbp && - !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ - continue; - - /* If this IB is TMZ, add frame TMZ start packet, - * else, turn off TMZ. - */ - if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) { - if (!secure) { - secure = true; - amdgpu_ring_emit_tmz(ring, true); - } - } else if (secure) { + if (!(ib->flags & AMDGPU_IB_FLAG_PREAMBLE) || + !skip_preamble || + (status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) || + amdgpu_mcbp || + amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ + break; + } + if (i >= num_ibs) + goto Done; + /* Setup initial TMZiness and send it off. + */ + secure = false; + if (job && ring->funcs->emit_frame_cntl) { + if (ib->flags & AMDGPU_IB_FLAGS_SECURE) + secure = true; + else secure = false; - amdgpu_ring_emit_tmz(ring, false); - } - - amdgpu_ring_emit_ib(ring, job, ib, status); - status &= ~AMDGPU_HAVE_CTX_SWITCH; + amdgpu_ring_emit_frame_cntl(ring, true, secure); } + amdgpu_ring_emit_ib(ring, job, ib, status); + status &= ~AMDGPU_HAVE_CTX_SWITCH; + i += 1; + /* Send the rest of the IBs. + */ + if (job && ring->funcs->emit_frame_cntl) { + for ( ; i < num_ibs; ++i) { + ib = &ibs[i]; + + /* drop preamble IBs if we don't have a context switch */ + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && + skip_preamble && + !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && + !amdgpu_mcbp && + !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ + continue; + + if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { + amdgpu_ring_emit_frame_cntl(ring, false, secure); + secure = !secure; + amdgpu_ring_emit_frame_cntl(ring, true, secure); + } - if (secure) { - secure = false; - amdgpu_ring_emit_tmz(ring, false); + amdgpu_ring_emit_ib(ring, job, ib, status); + status &= ~AMDGPU_HAVE_CTX_SWITCH; + } + amdgpu_ring_emit_frame_cntl(ring, false, secure); + } else { + for ( ; i < num_ibs; ++i) { + ib = &ibs[i]; + + /* drop preamble IBs if we don't have a context switch */ + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && + skip_preamble && + !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) && + !amdgpu_mcbp && + !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ + continue; + + amdgpu_ring_emit_ib(ring, job, ib, status); + status &= ~AMDGPU_HAVE_CTX_SWITCH; + } } - +Done: #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 24caff085d00..4d019d6b3eb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs { void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); - void (*emit_tmz)(struct amdgpu_ring *ring, bool start); + void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start, + bool secure); /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, enum drm_sched_priority priority); @@ -247,7 +248,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 44f00ecea322..3e83ddb64c3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -4724,12 +4724,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) sizeof(de_payload) >> 2); } -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, + bool secure) { - if (amdgpu_is_tmz(ring->adev)) { - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); - } + uint32_t v = secure ? FRAME_TMZ : 0; + + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); } static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) @@ -5183,7 +5184,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v10_0_ring_preempt_ib, - .emit_tmz = gfx_v10_0_ring_emit_tmz, + .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1c7a16b91686..fbde71224127 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); } -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, + bool secure) { - if (amdgpu_is_tmz(ring->adev)) { - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); - } + uint32_t v = secure ? FRAME_TMZ : 0; + + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); } static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, - .emit_tmz = gfx_v9_0_ring_emit_tmz, + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, -- 2.25.1.362.g51ebf55b93 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx