On 2020-02-25 8:57 a.m., Huang Rui wrote: > Since 6643ba1 frame control packets are only issued in presence of secure IB(s). > This causes hangs on some hardware (eg: Raven1). This patch restores the > unconditionnal frame control packets issuing, that's to keep the per-IB logic > regarding the secure flag. > > Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ) > > Reported-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx> > Signed-off-by: Huang Rui <ray.huang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 41 +++++++++++++++++++------------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 ++-- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 ++++++------ > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 +++++----- > 4 files changed, 43 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 4b2342d..9713a7d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -131,7 +131,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > uint64_t fence_ctx; > uint32_t status = 0, alloc_size; > unsigned fence_flags = 0; > - bool secure; > + int secure = -1; Don't initialize them far away from where they're being used. That's a very bad habit. This function is pretty long. Initialize "secure" next to where it is being used--right above the for-loop so that it is clear how it's used. > > unsigned i; > int r = 0; > @@ -216,7 +216,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > amdgpu_ring_emit_cntxcntl(ring, status); > } > > - secure = false; Set "secure" here, close to where it is used. This makes the code modular, so that it is easy to see what is happening _locally_. Another advantage is that should ever this code be pulled out into its own function, we don't have to chase variables around. > for (i = 0; i < num_ibs; ++i) { > ib = &ibs[i]; > > @@ -228,27 +227,37 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ > continue; > > - /* If this IB is TMZ, add frame TMZ start packet, > - * else, turn off TMZ. > - */ > - if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) { > - if (!secure) { > - secure = true; > - amdgpu_ring_emit_tmz(ring, true); > + if (job && ring->funcs->emit_frame_cntl) { > + if (secure == -1) { > + if (ib->flags & AMDGPU_IB_FLAGS_SECURE) { > + secure = 1; > + amdgpu_ring_emit_frame_cntl(ring, true, true); > + } else { > + secure = 0; > + amdgpu_ring_emit_frame_cntl(ring, true, false); > + } So the above is executed only once. At the very first iteration, since "secure" is set to -1. You can pull that out of the loop and have a more straightforward body of the loop. Regards, Luben > + } else { > + if (secure == 1 && > + !(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { > + secure = 0; > + amdgpu_ring_emit_frame_cntl(ring, false, true); > + amdgpu_ring_emit_frame_cntl(ring, true, false); > + } else if (secure == 0 && > + ib->flags & AMDGPU_IB_FLAGS_SECURE) { > + secure = 1; > + amdgpu_ring_emit_frame_cntl(ring, false, false); > + amdgpu_ring_emit_frame_cntl(ring, true, true); > + } > } > - } else if (secure) { > - secure = false; > - amdgpu_ring_emit_tmz(ring, false); > } > > amdgpu_ring_emit_ib(ring, job, ib, status); > status &= ~AMDGPU_HAVE_CTX_SWITCH; > } > > - if (secure) { > - secure = false; > - amdgpu_ring_emit_tmz(ring, false); > - } > + if (job && ring->funcs->emit_frame_cntl) > + amdgpu_ring_emit_frame_cntl(ring, false, > + (secure == 1) ? true : false); > > #ifdef CONFIG_X86_64 > if (!(adev->flags & AMD_IS_APU)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index 24caff0..4d019d6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs { > void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, > uint32_t reg0, uint32_t reg1, > uint32_t ref, uint32_t mask); > - void (*emit_tmz)(struct amdgpu_ring *ring, bool start); > + void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start, > + bool secure); > /* priority functions */ > void (*set_priority) (struct amdgpu_ring *ring, > enum drm_sched_priority priority); > @@ -247,7 +248,7 @@ struct amdgpu_ring { > #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) > #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) > #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) > -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) > +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) > #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) > #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) > #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 7b61583..748ac35 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); > static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); > static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); > static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); > -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); > +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); > > static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) > { > @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) > sizeof(de_payload) >> 2); > } > > -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) > +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, > + bool secure) > { > - if (amdgpu_is_tmz(ring->adev)) { > - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); > - amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); > - } > + uint32_t v = secure ? FRAME_TMZ : 0; > + > + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); > + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); > } > > static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) > @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { > .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, > .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, > .preempt_ib = gfx_v10_0_ring_preempt_ib, > - .emit_tmz = gfx_v10_0_ring_emit_tmz, > + .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, > .emit_wreg = gfx_v10_0_ring_emit_wreg, > .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, > .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 1c7a16b..fbde712 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) > amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); > } > > -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) > +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, > + bool secure) > { > - if (amdgpu_is_tmz(ring->adev)) { > - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); > - amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); > - } > + uint32_t v = secure ? FRAME_TMZ : 0; > + > + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); > + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); > } > > static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) > @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { > .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, > .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, > .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, > - .emit_tmz = gfx_v9_0_ring_emit_tmz, > + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, > .emit_wreg = gfx_v9_0_ring_emit_wreg, > .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, > .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx