As we have pinned the timeline (using tl->active_count), we can safely drop the tl->mutex as we wait for what we believe to be the final request on that timeline. This is useful for ensuring that we do not block the engine heartbeat by hogging the kernel_context's timeline on a dead GPU. References: https://gitlab.freedesktop.org/drm/intel/issues/1364 Fixes: 058179e72e09 ("drm/i915/gt: Replace hangcheck by heartbeats") Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 8a5054f21bf8..436412d07689 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -147,24 +147,31 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) fence = i915_active_fence_get(&tl->last_request); if (fence) { + mutex_unlock(&tl->mutex); + timeout = dma_fence_wait_timeout(fence, interruptible, timeout); dma_fence_put(fence); + + if (!mutex_trylock(&tl->mutex)) { + active_count++; + goto out_active; + } } } if (!retire_requests(tl) || flush_submission(gt)) active_count++; + mutex_unlock(&tl->mutex); - spin_lock(&timelines->lock); +out_active: spin_lock(&timelines->lock); /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); - mutex_unlock(&tl->mutex); /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { -- 2.25.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx