This is a partial port of the following patch from John Harrison's GPU scheduler patch series: (patch sent to Intel-GFX with the subject line " [RFC 19/39] drm/i915: Added scheduler support to __wait_request() calls" on Fri 17 July 2015) Author: John Harrison <John.C.Harrison@xxxxxxxxx> Date: Thu Apr 10 10:48:55 2014 +0100 Subject: drm/i915: Added scheduler support to __wait_request() calls Removed all scheduler references and backported it to this baseline. The reason we need this is because Chris Wilson has pointed out that threads that don't hold the struct_mutex should not be thrown out of __i915_wait_request during TDR hang recovery. Therefore we need a way to determine which threads are holding the mutex and which are not. Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Signed-off-by: Tomas Elf <tomas.elf@xxxxxxxxx> Signed-off-by: John Harrison <john.c.harrison@xxxxxxxxx> Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> --- Note: These names for WAIT_INTERRUPTIBLE and WAIT_LOCKED are not consistent with the ones used in Scheduler series, I agreed upon a consistent naming with John Harrison but forgot to update them this time. drivers/gpu/drm/i915/i915_drv.h | 7 +++- drivers/gpu/drm/i915/i915_gem.c | 67 ++++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_display.c | 4 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++- 4 files changed, 63 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 682bf207..2aafb2f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3115,9 +3115,14 @@ void __i915_add_request(struct drm_i915_gem_request *req, __i915_add_request(req, NULL, true) #define i915_add_request_no_flush(req) \ __i915_add_request(req, NULL, false) + +/* flags used by users of __i915_wait_request */ +#define WAIT_INTERRUPTIBLE (1<<0) +#define WAIT_LOCKED (1<<1) + int __i915_wait_request(struct drm_i915_gem_request *req, unsigned reset_counter, - bool interruptible, + u32 flags, s64 *timeout, struct intel_rps_client *rps); int __must_check i915_wait_request(struct drm_i915_gem_request *req); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5ca8bd5..b8adf4a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1244,7 +1244,9 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) * __i915_wait_request - wait until execution of request has finished * @req: duh! * @reset_counter: reset sequence associated with the given request - * @interruptible: do an interruptible wait (normally yes) + * @flags: flags to define the nature of wait + * WAIT_INTERRUPTIBLE - do an interruptible wait (normally yes) + * WAIT_LOCKED - caller is holding struct_mutex * @timeout: in - how long to wait (NULL forever); out - how much time remaining * * Note: It is of utmost importance that the passed in seqno and reset_counter @@ -1259,7 +1261,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state) */ int __i915_wait_request(struct drm_i915_gem_request *req, unsigned reset_counter, - bool interruptible, + u32 flags, s64 *timeout, struct intel_rps_client *rps) { @@ -1268,6 +1270,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, struct drm_i915_private *dev_priv = dev->dev_private; const bool irq_test_in_progress = ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine); + bool interruptible = flags & WAIT_INTERRUPTIBLE; int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; DEFINE_WAIT(wait); unsigned long timeout_expire; @@ -1316,22 +1319,43 @@ int __i915_wait_request(struct drm_i915_gem_request *req, for (;;) { struct timer_list timer; int reset_in_progress; + bool locked = flags & WAIT_LOCKED; prepare_to_wait(&engine->irq_queue, &wait, state); + /* + * If the driver is terminally wedged then we are stuck in + * irrecoverable situation, just return -EIO as + there is no + * point in having the caller retry + */ + if (unlikely(i915_terminally_wedged(&dev_priv->gpu_error))) { + ret = -EIO; + break; + } + /* We need to check whether any gpu reset happened in between * the caller grabbing the seqno and now ... */ + if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) { + ret = 0; + break; + } + reset_in_progress = i915_gem_check_wedge(&dev_priv->gpu_error, NULL, interruptible); - if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter) || - reset_in_progress) { - /* ... but upgrade the -EAGAIN to an -EIO if the gpu - * is truely gone. */ - if (reset_in_progress) - ret = reset_in_progress; - else + if (reset_in_progress) { + /* + * if either full gpu reset or engine reset is in + * progress and the caller is holding the mutex then + * ask them to try again, this should make them + * release struct_mutex otherwise if reset recovery is + * also waiting for it then we cannot continue with + * recovery unless it is released + */ + if (locked) ret = -EAGAIN; + else + ret = reset_in_progress; break; } @@ -1485,24 +1509,27 @@ i915_wait_request(struct drm_i915_gem_request *req) { struct drm_device *dev; struct drm_i915_private *dev_priv; - bool interruptible; + u32 flags; int ret; BUG_ON(req == NULL); dev = req->engine->dev; dev_priv = dev->dev_private; - interruptible = dev_priv->mm.interruptible; BUG_ON(!mutex_is_locked(&dev->struct_mutex)); - ret = i915_gem_check_wedge(&dev_priv->gpu_error, NULL, interruptible); + flags = dev_priv->mm.interruptible ? WAIT_INTERRUPTIBLE : 0; + flags |= WAIT_LOCKED; + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, NULL, + (flags & WAIT_INTERRUPTIBLE)); if (ret) return ret; ret = __i915_wait_request(req, atomic_read(&dev_priv->gpu_error.reset_counter), - interruptible, NULL, NULL); + flags, NULL, NULL); if (ret) return ret; @@ -1614,7 +1641,8 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, mutex_unlock(&dev->struct_mutex); for (i = 0; ret == 0 && i < n; i++) - ret = __i915_wait_request(requests[i], reset_counter, true, + ret = __i915_wait_request(requests[i], reset_counter, + WAIT_INTERRUPTIBLE, NULL, rps); mutex_lock(&dev->struct_mutex); @@ -3168,7 +3196,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (i = 0; i < n; i++) { if (ret == 0) - ret = __i915_wait_request(req[i], reset_counter, true, + ret = __i915_wait_request(req[i], reset_counter, WAIT_INTERRUPTIBLE, args->timeout_ns > 0 ? &args->timeout_ns : NULL, to_rps_client(file)); i915_gem_request_unreference__unlocked(req[i]); @@ -3199,9 +3227,14 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (!i915_semaphore_is_enabled(obj->base.dev)) { struct drm_i915_private *i915 = to_i915(obj->base.dev); + u32 flags; + + flags = i915->mm.interruptible ? WAIT_INTERRUPTIBLE : 0; + flags |= WAIT_LOCKED; + ret = __i915_wait_request(from_req, atomic_read(&i915->gpu_error.reset_counter), - i915->mm.interruptible, + flags, NULL, &i915->rps.semaphores); if (ret) @@ -4189,7 +4222,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (target == NULL) return 0; - ret = __i915_wait_request(target, reset_counter, true, NULL, NULL); + ret = __i915_wait_request(target, reset_counter, WAIT_INTERRUPTIBLE, NULL, NULL); if (ret == 0) queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 551541b303..e188ea8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11360,7 +11360,7 @@ static void intel_mmio_flip_work_func(struct work_struct *work) if (mmio_flip->req) { WARN_ON(__i915_wait_request(mmio_flip->req, mmio_flip->crtc->reset_counter, - false, NULL, + 0, NULL, &mmio_flip->i915->rps.mmioflips)); i915_gem_request_unreference__unlocked(mmio_flip->req); } @@ -13428,7 +13428,7 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, continue; ret = __i915_wait_request(intel_plane_state->wait_req, - reset_counter, true, + reset_counter, WAIT_INTERRUPTIBLE, NULL, NULL); /* Swallow -EIO errors to allow updates during hw lockup. */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3b087f0..df44e22 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2375,6 +2375,7 @@ static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf) int intel_engine_idle(struct intel_engine_cs *engine) { struct drm_i915_gem_request *req; + u32 flags; /* Wait upon the last request to be completed */ if (list_empty(&engine->request_list)) @@ -2384,10 +2385,13 @@ int intel_engine_idle(struct intel_engine_cs *engine) struct drm_i915_gem_request, list); + flags = to_i915(engine->dev)->mm.interruptible ? WAIT_INTERRUPTIBLE : 0; + flags |= WAIT_LOCKED; + /* Make sure we do not trigger any retires */ return __i915_wait_request(req, atomic_read(&to_i915(engine->dev)->gpu_error.reset_counter), - to_i915(engine->dev)->mm.interruptible, + flags, NULL, NULL); } -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx