The struct_mutex can have some tricky interactions with other mutexes (mainly due to using nasty constructs like stop_machine() from within its confines). This makes it "illegal" (lockdep should generate WARNs) from certain paths like suspend, where the locking order may be inverted. We can extend the RCU request management to track activity on an engine and thereby wait upon all GPU activity without taking the struct_mutex. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem.c | 56 +++++++++++++-------------------- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.c | 8 +++-- drivers/gpu/drm/i915/i915_gem_request.h | 11 +++++++ drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 3 +- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 30 +++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.h | 26 ++++++++------- 10 files changed, 73 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 76e5a241c7be..c1e91589e7bc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2179,13 +2179,18 @@ static void i915_gem_reset_engine_status(struct intel_engine_cs *engine) static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) { + struct drm_i915_gem_request *request; struct intel_ring *ring; + request = i915_gem_active_peek(&engine->last_request, + &engine->i915->dev->struct_mutex); + /* Mark all pending requests as complete so that any concurrent * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - intel_engine_init_seqno(engine, engine->last_submitted_seqno); + if (request) + intel_engine_init_seqno(engine, request->fence.seqno); /* * Clear the execlists queue up before freeing the requests, as those @@ -2207,15 +2212,9 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * implicit references on things like e.g. ppgtt address spaces through * the request. */ - if (!list_empty(&engine->request_list)) { - struct drm_i915_gem_request *request; - - request = list_last_entry(&engine->request_list, - struct drm_i915_gem_request, - link); - + if (request) i915_gem_request_retire_upto(request); - } + GEM_BUG_ON(intel_engine_is_active(engine)); /* Having flushed all requests from all queues, we know that all * ringbuffers must now be empty. However, since we do not reclaim @@ -2614,8 +2613,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; int ret; - lockdep_assert_held(&dev_priv->dev->struct_mutex); - for_each_engine(engine, dev_priv) { if (engine->last_context == NULL) continue; @@ -3787,47 +3784,36 @@ struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, return NULL; } -static void -i915_gem_stop_engines(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *engine; - - for_each_engine(engine, dev_priv) - dev_priv->gt.stop_engine(engine); -} - int i915_gem_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int ret = 0; + int ret; - mutex_lock(&dev->struct_mutex); ret = i915_gem_wait_for_idle(dev_priv); if (ret) - goto err; - - i915_gem_retire_requests(dev_priv); - - i915_gem_stop_engines(dev); - i915_gem_context_lost(dev_priv); - mutex_unlock(&dev->struct_mutex); + return ret; cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gt.retire_work); flush_delayed_work(&dev_priv->gt.idle_work); + mutex_lock(&dev_priv->dev->struct_mutex); + /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. */ - WARN_ON(dev_priv->gt.awake); + if (dev_priv->gt.awake) { + if (INTEL_INFO(dev_priv)->gen >= 6) + gen6_rps_idle(dev_priv); + intel_runtime_pm_put(dev_priv); + dev_priv->gt.awake = false; + } - return 0; + i915_gem_context_lost(dev_priv); + mutex_unlock(&dev_priv->dev->struct_mutex); -err: - mutex_unlock(&dev->struct_mutex); - return ret; + return 0; } void i915_gem_init_swizzling(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 680365f4c4cd..3ead9359dfa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -69,7 +69,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; for_each_engine(engine, dev_priv) { - if (!list_empty(&engine->request_list)) + if (intel_engine_is_active(engine)) return false; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a0cdd3f10566..016edc6f2d0b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -445,6 +445,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, trace_i915_gem_request_add(request); + trace_i915_gem_request_add(request); request->head = request_start; /* Whilst this request exists, batch_obj will be on the @@ -462,7 +463,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, */ request->emitted_jiffies = jiffies; request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->fence.seqno); + engine->last_submitted_seqno = request->fence.seqno; + i915_gem_active_set(&engine->last_request, request); list_add_tail(&request->link, &engine->request_list); /* Record the position of the start of the request so that @@ -690,7 +692,7 @@ complete: } if (!IS_ERR_OR_NULL(rps) && - req->fence.seqno == req->engine->last_submitted_seqno) { + req == __i915_gem_active_peek(&req->engine->last_request)) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -757,7 +759,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv) { i915_gem_retire_requests_ring(engine); - if (list_empty(&engine->request_list)) + if (!intel_engine_is_active(engine)) dev_priv->gt.active_engines &= ~intel_engine_flag(engine); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 006f212b7fd6..8d1225999fae 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -27,6 +27,17 @@ #include <linux/fence.h> +struct intel_wait { + struct rb_node node; + struct task_struct *task; + u32 seqno; +}; + +struct intel_signal_node { + struct rb_node node; + struct intel_wait wait; +}; + /** * Request queue structure. * diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index cfae2fe1e14f..c2cf5bd57db5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1003,7 +1003,7 @@ static void i915_record_ring_state(struct drm_i915_private *dev_priv, ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ering->acthd = intel_engine_get_active_head(engine); ering->seqno = intel_engine_get_seqno(engine); - ering->last_seqno = engine->last_submitted_seqno; + ering->last_seqno = __active_get_seqno(&engine->last_request); ering->start = I915_READ_START(engine); ering->head = I915_READ_HEAD(engine); ering->tail = I915_READ_TAIL(engine); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1ffc997b19af..3987b7984fd8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2805,8 +2805,7 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe) static bool ring_idle(struct intel_engine_cs *engine, u32 seqno) { - return i915_seqno_passed(seqno, - READ_ONCE(engine->last_submitted_seqno)); + return !intel_engine_is_active(engine); } static bool diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6cdc421fdc37..4bf63af2a282 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1787,7 +1787,6 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id) engine->fw_domains = fw_domains; - INIT_LIST_HEAD(&engine->request_list); INIT_LIST_HEAD(&engine->buffers); INIT_LIST_HEAD(&engine->execlist_queue); spin_lock_init(&engine->execlist_lock); @@ -1799,6 +1798,7 @@ logical_ring_setup(struct drm_device *dev, enum intel_engine_id id) logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine, info->irq_shift); + intel_engine_init_requests(engine); intel_engine_init_hangcheck(engine); i915_gem_batch_pool_init(engine, &engine->batch_pool); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c141d3e15eed..45bf830a9b10 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6335,7 +6335,7 @@ bool i915_gpu_busy(void) dev_priv = i915_mch_dev; for_each_engine(engine, dev_priv) - ret |= !list_empty(&engine->request_list); + ret |= intel_engine_is_active(engine); out_unlock: spin_unlock_irq(&mchdev_lock); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f86039455c5a..f172ac6a06dc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2058,13 +2058,13 @@ static int intel_init_engine(struct drm_device *dev, engine->i915 = dev_priv; engine->fence_context = fence_context_alloc(1); - INIT_LIST_HEAD(&engine->request_list); INIT_LIST_HEAD(&engine->execlist_queue); INIT_LIST_HEAD(&engine->buffers); i915_gem_batch_pool_init(engine, &engine->batch_pool); memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno)); + intel_engine_init_requests(engine); intel_engine_init_breadcrumbs(engine); /* We may need to do things with the shrinker which @@ -2152,22 +2152,24 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) engine->i915 = NULL; } -int intel_engine_idle(struct intel_engine_cs *engine) +static void +intel_engine_retire(struct i915_gem_active *active, + struct drm_i915_gem_request *rq) { - struct drm_i915_gem_request *req; - - /* Wait upon the last request to be completed */ - if (list_empty(&engine->request_list)) - return 0; +} - req = list_entry(engine->request_list.prev, - struct drm_i915_gem_request, - link); +void intel_engine_init_requests(struct intel_engine_cs *engine) +{ + init_request_active(&engine->last_request, intel_engine_retire); + INIT_LIST_HEAD(&engine->request_list); +} - /* Make sure we do not trigger any retires */ - return __i915_wait_request(req, - req->i915->mm.interruptible, - NULL, NULL); +int intel_engine_idle(struct intel_engine_cs *engine) +{ + /* Wait upon the last request to be completed */ + return i915_gem_active_wait_unlocked(&engine->last_request, + engine->i915->mm.interruptible, + NULL, NULL); } int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0976e155edc0..d19fb8c24919 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,6 +3,7 @@ #include <linux/hashtable.h> #include "i915_gem_batch_pool.h" +#include "i915_gem_request.h" #define I915_CMD_HASH_ORDER 9 @@ -310,6 +311,14 @@ struct intel_engine_cs { * inspecting request list. */ u32 last_submitted_seqno; + + /* An RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_gem_active_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_gem_active last_request; + unsigned user_interrupts; struct i915_gem_context *last_context; @@ -455,6 +464,7 @@ void intel_ring_update_space(struct intel_ring *ring); int __must_check intel_engine_idle(struct intel_engine_cs *engine); void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); +void intel_engine_init_requests(struct intel_engine_cs *engine); int intel_init_pipe_control(struct intel_engine_cs *engine, int size); void intel_fini_pipe_control(struct intel_engine_cs *engine); @@ -493,17 +503,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) } /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ -struct intel_wait { - struct rb_node node; - struct task_struct *task; - u32 seqno; -}; - -struct intel_signal_node { - struct rb_node node; - struct intel_wait wait; -}; - void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) { @@ -540,4 +539,9 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned intel_kick_waiters(struct drm_i915_private *i915); unsigned intel_kick_signalers(struct drm_i915_private *i915); +static inline bool intel_engine_is_active(struct intel_engine_cs *engine) +{ + return __i915_gem_active_is_busy(&engine->last_request); +} + #endif /* _INTEL_RINGBUFFER_H_ */ -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx