On 2020-02-27 5:10 p.m., Luben Tuikov wrote: > On 2020-02-27 6:47 a.m., Christian König wrote: >> Am 27.02.20 um 12:38 schrieb Huang Rui: >>> Since 6643ba1 frame control packets are only issued in presence of secure IB(s). >>> This causes hangs on some hardware (eg: Raven1). This patch restores the >>> unconditionnal frame control packets issuing, that's to keep the per-IB logic >>> regarding the secure flag. >>> >>> Fixes: 6643ba1 drm/amdgpu: Move to a per-IB secure flag (TMZ) >>> >>> v1 -> v2: >>> As suggested by Luben, and accept part of implemetation from this patch: >>> - Put "secure" closed to the loop and use optimization >>> - Change "secure" to bool again, and move "secure == -1" out of loop. >>> >>> Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@xxxxxxx> >>> Signed-off-by: Huang Rui <ray.huang@xxxxxxx> >>> Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 29 +++++++++++++++-------------- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- >>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 ++++++++------- >>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 +++++++------ >>> 4 files changed, 33 insertions(+), 29 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >>> index 4b2342d..0f4909a 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c >>> @@ -216,7 +216,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, >>> amdgpu_ring_emit_cntxcntl(ring, status); >>> } >>> >>> + /* Setup initial TMZiness and send it off. >>> + */ >>> secure = false; >>> + if (job && ring->funcs->emit_frame_cntl) { >> >> Does anybody remember why we check the job here in the first place? > > Ring tests do not use a job. > > Having said this, I would've written a separate > submit for tests and a different one for jobs. > >> >> Independent of that I think that the check if >> ring->funcs->emit_frame_cntl should be moved into the >> amdgpu_ring_emit_frame_cntl() function so that we don't need to repeat >> that over and over again. > > What if that function is called from a loop? Since it would > be a static conditional it could be optimized away early on. > It's a classic optimization pattern. > >> >> If amdgpu_ring_emit_frame_cntl() is still a macro then that is probably >> also a good opportunity to change that. >> >>> + secure = ib->flags & AMDGPU_IB_FLAGS_SECURE ? true : false; Tim says no need for the ternary here. >>> + amdgpu_ring_emit_frame_cntl(ring, true, secure); >>> + } >>> + >>> for (i = 0; i < num_ibs; ++i) { >>> ib = &ibs[i]; >>> >>> @@ -228,27 +235,21 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, >>> !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ >>> continue; >>> >>> - /* If this IB is TMZ, add frame TMZ start packet, >>> - * else, turn off TMZ. >>> - */ >>> - if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) { >>> - if (!secure) { >>> - secure = true; >>> - amdgpu_ring_emit_tmz(ring, true); >>> + if (job && ring->funcs->emit_frame_cntl) { >>> + if (!!secure ^ !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { >> >> Maybe write this as (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)). >> That is probably easier to understand. > > Thanks! This is indeed better! > > I'll resubmit with your suggestions and another good one from Tim. Sorry, I thought this was a comment on my patch as the expression was taken right out of it. >> >>> + amdgpu_ring_emit_frame_cntl(ring, false, secure); >>> + secure = !secure; >>> + amdgpu_ring_emit_frame_cntl(ring, true, secure); >>> } >>> - } else if (secure) { >>> - secure = false; >>> - amdgpu_ring_emit_tmz(ring, false); >>> } >>> >>> amdgpu_ring_emit_ib(ring, job, ib, status); >>> status &= ~AMDGPU_HAVE_CTX_SWITCH; >>> } >>> >>> - if (secure) { >>> - secure = false; >>> - amdgpu_ring_emit_tmz(ring, false); >>> - } >>> + if (job && ring->funcs->emit_frame_cntl) >>> + amdgpu_ring_emit_frame_cntl(ring, false, >>> + secure ? true : false); "secure" is already a boolean. Regards, Luben >>> >>> #ifdef CONFIG_X86_64 >>> if (!(adev->flags & AMD_IS_APU)) >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> index 24caff0..4d019d6 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h >>> @@ -166,7 +166,8 @@ struct amdgpu_ring_funcs { >>> void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, >>> uint32_t reg0, uint32_t reg1, >>> uint32_t ref, uint32_t mask); >>> - void (*emit_tmz)(struct amdgpu_ring *ring, bool start); >>> + void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start, >>> + bool secure); >>> /* priority functions */ >>> void (*set_priority) (struct amdgpu_ring *ring, >>> enum drm_sched_priority priority); >>> @@ -247,7 +248,7 @@ struct amdgpu_ring { >>> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) >>> #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) >>> #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) >>> -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) >>> +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) >>> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) >>> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) >>> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >>> index 7b61583..748ac35 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c >>> @@ -256,7 +256,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); >>> static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); >>> static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); >>> static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); >>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); >>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); >>> >>> static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) >>> { >>> @@ -4729,12 +4729,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) >>> sizeof(de_payload) >> 2); >>> } >>> >>> -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) >>> +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, >>> + bool secure) >>> { >>> - if (amdgpu_is_tmz(ring->adev)) { >>> - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); >>> - amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); >>> - } >>> + uint32_t v = secure ? FRAME_TMZ : 0; >>> + >>> + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); >>> + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); >>> } >>> >>> static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) >>> @@ -5188,7 +5189,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { >>> .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, >>> .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, >>> .preempt_ib = gfx_v10_0_ring_preempt_ib, >>> - .emit_tmz = gfx_v10_0_ring_emit_tmz, >>> + .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, >>> .emit_wreg = gfx_v10_0_ring_emit_wreg, >>> .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, >>> .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, >>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>> index 1c7a16b..fbde712 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c >>> @@ -5230,12 +5230,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) >>> amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); >>> } >>> >>> -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) >>> +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, >>> + bool secure) >>> { >>> - if (amdgpu_is_tmz(ring->adev)) { >>> - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); >>> - amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); >>> - } >>> + uint32_t v = secure ? FRAME_TMZ : 0; >>> + >>> + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); >>> + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); >>> } >>> >>> static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) >>> @@ -6477,7 +6478,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { >>> .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, >>> .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, >>> .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, >>> - .emit_tmz = gfx_v9_0_ring_emit_tmz, >>> + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, >>> .emit_wreg = gfx_v9_0_ring_emit_wreg, >>> .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, >>> .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, >> > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx