On Sat, Sep 09, 2023 at 08:58:45PM -0700, Alan Previn wrote: > If we are at the end of suspend or very early in resume > its possible an async fence signal could lead us to the > execution of the context destruction worker (after the > prior worker flush). > > Even if checking that the CT is enabled before calling > destroyed_worker_func, guc_lrc_desc_unpin may still fail > because in corner cases, as we traverse the GuC's > context-destroy list, the CT could get disabled in the mid > of it right before calling the GuC's CT send function. > > We've witnessed this race condition once every ~6000-8000 > suspend-resume cycles while ensuring workloads that render > something onscreen is continuously started just before > we suspend (and the workload is small enough to complete > and trigger the queued engine/context free-up either very > late in suspend or very early in resume). > > In such a case, we need to unroll the unpin process because > guc-lrc-unpin takes a gt wakeref which only gets released in > the G2H IRQ reply that never comes through in this corner > case. That will cascade into a kernel hang later at the tail > end of suspend in this function: > > intel_wakeref_wait_for_idle(>->wakeref) > (called by) - intel_gt_pm_wait_for_idle > (called by) - wait_for_suspend > > Doing this unroll and keeping the context in the GuC's > destroy-list will allow the context to get picked up on > the next destroy worker invocation or purged as part of a > major GuC sanitization or reset flow. > > Signed-off-by: Alan Previn <alan.previn.teres.alexis@xxxxxxxxx> > Tested-by: Mousumi Jana <mousumi.jana@xxxxxxxxx> > --- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 76 ++++++++++++++++--- > 1 file changed, 65 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 0ed44637bca0..db7df1217350 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -235,6 +235,13 @@ set_context_destroyed(struct intel_context *ce) > ce->guc_state.sched_state |= SCHED_STATE_DESTROYED; > } > > +static inline void > +clr_context_destroyed(struct intel_context *ce) > +{ > + lockdep_assert_held(&ce->guc_state.lock); > + ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED; > +} > + > static inline bool context_pending_disable(struct intel_context *ce) > { > return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE; > @@ -612,6 +619,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, > u32 g2h_len_dw, > bool loop) > { > + int ret; > + > /* > * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), > * so we don't handle the case where we don't get a reply because we > @@ -622,7 +631,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc, > if (g2h_len_dw) > atomic_inc(&guc->outstanding_submission_g2h); > > - return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > + ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); > + if (ret) > + atomic_dec(&guc->outstanding_submission_g2h); > + > + return ret; > } > > int intel_guc_wait_for_pending_msg(struct intel_guc *guc, > @@ -3173,12 +3186,13 @@ static void guc_context_close(struct intel_context *ce) > spin_unlock_irqrestore(&ce->guc_state.lock, flags); > } > > -static inline void guc_lrc_desc_unpin(struct intel_context *ce) > +static inline int guc_lrc_desc_unpin(struct intel_context *ce) > { > struct intel_guc *guc = ce_to_guc(ce); > struct intel_gt *gt = guc_to_gt(guc); > unsigned long flags; > bool disabled; > + int ret; > > GEM_BUG_ON(!intel_gt_pm_is_awake(gt)); > GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id)); > @@ -3188,19 +3202,33 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) > /* Seal race with Reset */ > spin_lock_irqsave(&ce->guc_state.lock, flags); > disabled = submission_disabled(guc); > - if (likely(!disabled)) { > - __intel_gt_pm_get(gt); > - set_context_destroyed(ce); > - clr_context_registered(ce); > - } > - spin_unlock_irqrestore(&ce->guc_state.lock, flags); > if (unlikely(disabled)) { > + spin_unlock_irqrestore(&ce->guc_state.lock, flags); > release_guc_id(guc, ce); > __guc_context_destroy(ce); > - return; > + return 0; > } > > - deregister_context(ce, ce->guc_id.id); > + /* GuC is active, lets destroy this context, > + * but at this point we can still be racing with > + * suspend, so we undo everything if the H2G fails > + */ > + > + /* Change context state to destroyed and get gt-pm */ > + __intel_gt_pm_get(gt); > + set_context_destroyed(ce); > + clr_context_registered(ce); > + > + ret = deregister_context(ce, ce->guc_id.id); > + if (ret) { > + /* Undo the state change and put gt-pm if that failed */ > + set_context_registered(ce); > + clr_context_destroyed(ce); > + intel_gt_pm_put(gt); This is a might_sleep inside a spin_lock! Are you 100% confident no WARN was seeing during the tests indicated in the commit msg? > + } > + spin_unlock_irqrestore(&ce->guc_state.lock, flags); > + > + return 0; If you are always returning 0, there's no pointing in s/void/int... > } > > static void __guc_context_destroy(struct intel_context *ce) > @@ -3268,7 +3296,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc) > if (!ce) > break; > > - guc_lrc_desc_unpin(ce); > + if (guc_lrc_desc_unpin(ce)) { > + /* > + * This means GuC's CT link severed mid-way which could happen > + * in suspend-resume corner cases. In this case, put the > + * context back into the destroyed_contexts list which will > + * get picked up on the next context deregistration event or > + * purged in a GuC sanitization event (reset/unload/wedged/...). > + */ > + spin_lock_irqsave(&guc->submission_state.lock, flags); > + list_add_tail(&ce->destroyed_link, > + &guc->submission_state.destroyed_contexts); > + spin_unlock_irqrestore(&guc->submission_state.lock, flags); > + /* Bail now since the list might never be emptied if h2gs fail */ > + break; > + } > + > } > } > > @@ -3279,6 +3322,17 @@ static void destroyed_worker_func(struct work_struct *w) > struct intel_gt *gt = guc_to_gt(guc); > int tmp; > > + /* > + * In rare cases we can get here via async context-free fence-signals that > + * come very late in suspend flow or very early in resume flows. In these > + * cases, GuC won't be ready but just skipping it here is fine as these > + * pending-destroy-contexts get destroyed totally at GuC reset time at the > + * end of suspend.. OR.. this worker can be picked up later on the next > + * context destruction trigger after resume-completes who is triggering the work queue again? I mean, I'm wondering if it is necessary to re-schedule it from inside... but also wonder if this is safe anyway... > + */ > + if (!intel_guc_is_ready(guc)) > + return; > + > with_intel_gt_pm(gt, tmp) > deregister_destroyed_contexts(guc); > } > -- > 2.39.0 >