If we introduce a new mutex for the exclusive use of GEM's runtime power management, we can remove its requirement of holding the struct_mutex. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 9 +-- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem.h | 3 - drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_pm.c | 70 ++++++++++++------- drivers/gpu/drm/i915/i915_request.c | 24 ++----- .../gpu/drm/i915/selftests/i915_gem_context.c | 4 +- .../gpu/drm/i915/selftests/i915_gem_object.c | 13 ++-- .../gpu/drm/i915/selftests/mock_gem_device.c | 3 +- 10 files changed, 68 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 47bf07a59b5e..98ff1a14ccf3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2034,7 +2034,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "RPS enabled? %d\n", rps->enabled); seq_printf(m, "GPU busy? %s [%d requests]\n", - yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); + yesno(dev_priv->gt.awake), + atomic_read(&dev_priv->gt.active_requests)); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -2055,7 +2056,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && - dev_priv->gt.active_requests) { + atomic_read(&dev_priv->gt.active_requests)) { u32 rpup, rpupei; u32 rpdown, rpdownei; @@ -3087,7 +3088,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake)); seq_printf(m, "Global active requests: %d\n", - dev_priv->gt.active_requests); + atomic_read(&dev_priv->gt.active_requests)); seq_printf(m, "CS timestamp frequency: %u kHz\n", RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); @@ -3933,7 +3934,7 @@ i915_drop_caches_set(void *data, u64 val) if (val & DROP_IDLE) { do { - if (READ_ONCE(i915->gt.active_requests)) + if (atomic_read(&i915->gt.active_requests)) flush_delayed_work(&i915->gt.retire_work); drain_delayed_work(&i915->gt.idle_work); } while (READ_ONCE(i915->gt.awake)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 11803d485275..7c7afe99986c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2008,7 +2008,8 @@ struct drm_i915_private { intel_engine_mask_t active_engines; struct list_head active_rings; struct list_head closed_vma; - u32 active_requests; + atomic_t active_requests; + struct mutex active_mutex; /** * Is the GPU currently considered idle, or busy executing diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 47c672432594..79919e0cf03d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2914,7 +2914,7 @@ wait_for_timelines(struct drm_i915_private *i915, struct i915_gt_timelines *gt = &i915->gt.timelines; struct i915_timeline *tl; - if (!READ_ONCE(i915->gt.active_requests)) + if (!atomic_read(&i915->gt.active_requests)) return timeout; mutex_lock(>->mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 5c073fe73664..bd13198a9058 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -77,9 +77,6 @@ struct drm_i915_private; #define I915_GEM_IDLE_TIMEOUT (HZ / 5) -void i915_gem_park(struct drm_i915_private *i915); -void i915_gem_unpark(struct drm_i915_private *i915); - static inline void __tasklet_disable_sync_once(struct tasklet_struct *t) { if (!atomic_fetch_inc(&t->count)) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 060f5903544a..20e835a7f116 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -38,7 +38,7 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl { static bool ggtt_is_idle(struct drm_i915_private *i915) { - return !i915->gt.active_requests; + return !atomic_read(&i915->gt.active_requests); } static int ggtt_flush(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_gem_pm.c b/drivers/gpu/drm/i915/i915_gem_pm.c index faa4eb42ec0a..6ecd9f8ac87d 100644 --- a/drivers/gpu/drm/i915/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/i915_gem_pm.c @@ -14,8 +14,8 @@ static void __i915_gem_park(struct drm_i915_private *i915) GEM_TRACE("\n"); - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); + lockdep_assert_held(&i915->gt.active_mutex); + GEM_BUG_ON(atomic_read(&i915->gt.active_requests)); GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); if (!i915->gt.awake) @@ -94,12 +94,13 @@ static void idle_work_handler(struct work_struct *work) if (!READ_ONCE(i915->gt.awake)) return; - if (READ_ONCE(i915->gt.active_requests)) + if (atomic_read(&i915->gt.active_requests)) return; rearm_hangcheck = cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work); + /* XXX need to support lockless kernel_context before removing! */ if (!mutex_trylock(&i915->drm.struct_mutex)) { /* Currently busy, come back later */ mod_delayed_work(i915->wq, @@ -114,18 +115,19 @@ static void idle_work_handler(struct work_struct *work) * while we are idle (such as the GPU being power cycled), no users * will be harmed. */ + mutex_lock(&i915->gt.active_mutex); if (!work_pending(&i915->gt.idle_work.work) && - !i915->gt.active_requests) { - ++i915->gt.active_requests; /* don't requeue idle */ + !atomic_read(&i915->gt.active_requests)) { + atomic_inc(&i915->gt.active_requests); /* don't requeue idle */ switch_to_kernel_context_sync(i915, i915->gt.active_engines); - if (!--i915->gt.active_requests) { + if (atomic_dec_and_test(&i915->gt.active_requests)) { __i915_gem_park(i915); rearm_hangcheck = false; } } - + mutex_unlock(&i915->gt.active_mutex); mutex_unlock(&i915->drm.struct_mutex); out_rearm: @@ -137,26 +139,16 @@ static void idle_work_handler(struct work_struct *work) void i915_gem_park(struct drm_i915_private *i915) { - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(i915->gt.active_requests); - - if (!i915->gt.awake) - return; - /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ - mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); + GEM_BUG_ON(!atomic_read(&i915->gt.active_requests)); + if (atomic_dec_and_test(&i915->gt.active_requests)) + mod_delayed_work(i915->wq, + &i915->gt.idle_work, + msecs_to_jiffies(100)); } -void i915_gem_unpark(struct drm_i915_private *i915) +static void __i915_gem_unpark(struct drm_i915_private *i915) { - GEM_TRACE("\n"); - - lockdep_assert_held(&i915->drm.struct_mutex); - GEM_BUG_ON(!i915->gt.active_requests); - assert_rpm_wakelock_held(i915); - if (i915->gt.awake) return; @@ -191,11 +183,29 @@ void i915_gem_unpark(struct drm_i915_private *i915) round_jiffies_up_relative(HZ)); } +void i915_gem_unpark(struct drm_i915_private *i915) +{ + if (atomic_add_unless(&i915->gt.active_requests, 1, 0)) + return; + + mutex_lock(&i915->gt.active_mutex); + if (!atomic_read(&i915->gt.active_requests)) { + GEM_TRACE("\n"); + __i915_gem_unpark(i915); + smp_mb__before_atomic(); + } + atomic_inc(&i915->gt.active_requests); + mutex_unlock(&i915->gt.active_mutex); +} + bool i915_gem_load_power_context(struct drm_i915_private *i915) { + mutex_lock(&i915->gt.active_mutex); + atomic_inc(&i915->gt.active_requests); + /* Force loading the kernel context on all engines */ if (!switch_to_kernel_context_sync(i915, ALL_ENGINES)) - return false; + goto err_active; /* * Immediately park the GPU so that we enable powersaving and @@ -203,9 +213,20 @@ bool i915_gem_load_power_context(struct drm_i915_private *i915) * unpark and start using the engine->pinned_default_state, otherwise * it is in limbo and an early reset may fail. */ + + if (!atomic_dec_and_test(&i915->gt.active_requests)) + goto err_unlock; + __i915_gem_park(i915); + mutex_unlock(&i915->gt.active_mutex); return true; + +err_active: + atomic_dec(&i915->gt.active_requests); +err_unlock: + mutex_unlock(&i915->gt.active_mutex); + return false; } void i915_gem_suspend(struct drm_i915_private *i915) @@ -337,5 +358,6 @@ void i915_gem_resume(struct drm_i915_private *i915) void i915_gem_init__pm(struct drm_i915_private *i915) { + mutex_init(&i915->gt.active_mutex); INIT_DELAYED_WORK(&i915->gt.idle_work, idle_work_handler); } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index e9c2094ab8ea..8d396f3c747d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -31,6 +31,7 @@ #include "i915_drv.h" #include "i915_active.h" +#include "i915_gem_pm.h" /* XXX layering violation! */ #include "i915_globals.h" #include "i915_reset.h" @@ -130,19 +131,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static void reserve_gt(struct drm_i915_private *i915) -{ - if (!i915->gt.active_requests++) - i915_gem_unpark(i915); -} - -static void unreserve_gt(struct drm_i915_private *i915) -{ - GEM_BUG_ON(!i915->gt.active_requests); - if (!--i915->gt.active_requests) - i915_gem_park(i915); -} - static void advance_ring(struct i915_request *request) { struct intel_ring *ring = request->ring; @@ -304,7 +292,7 @@ static void i915_request_retire(struct i915_request *request) __retire_engine_upto(request->engine, request); - unreserve_gt(request->i915); + i915_gem_park(request->i915); i915_sched_node_fini(&request->sched); i915_request_put(request); @@ -607,8 +595,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) u32 seqno; int ret; - lockdep_assert_held(&i915->drm.struct_mutex); - /* * Preempt contexts are reserved for exclusive use to inject a * preemption context switch. They are never to be used for any trivial @@ -633,7 +619,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (IS_ERR(ce)) return ERR_CAST(ce); - reserve_gt(i915); + i915_gem_unpark(i915); mutex_lock(&ce->ring->timeline->mutex); /* Move our oldest request to the slab-cache (if not in use!) */ @@ -766,7 +752,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) kmem_cache_free(global.slab_requests, rq); err_unreserve: mutex_unlock(&ce->ring->timeline->mutex); - unreserve_gt(i915); + i915_gem_park(i915); intel_context_unpin(ce); return ERR_PTR(ret); } @@ -1356,7 +1342,7 @@ void i915_retire_requests(struct drm_i915_private *i915) lockdep_assert_held(&i915->drm.struct_mutex); - if (!i915->gt.active_requests) + if (!atomic_read(&i915->gt.active_requests)) return; list_for_each_entry_safe(ring, tmp, diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 45f73b8b4e6d..6ce366091e0b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1646,9 +1646,9 @@ static int __igt_switch_to_kernel_context(struct drm_i915_private *i915, return err; } - if (i915->gt.active_requests) { + if (atomic_read(&i915->gt.active_requests)) { pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n", - i915->gt.active_requests, + atomic_read(&i915->gt.active_requests), pass, from_idle ? "idle" : "busy", __engine_name(i915, engines), is_power_of_2(engines) ? "" : "s"); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 971148fbe6f5..c2b08fdf23cf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -506,12 +506,7 @@ static void disable_retire_worker(struct drm_i915_private *i915) i915_gem_shrinker_unregister(i915); mutex_lock(&i915->drm.struct_mutex); - if (!i915->gt.active_requests++) { - intel_wakeref_t wakeref; - - with_intel_runtime_pm(i915, wakeref) - i915_gem_unpark(i915); - } + i915_gem_unpark(i915); mutex_unlock(&i915->drm.struct_mutex); cancel_delayed_work_sync(&i915->gt.retire_work); @@ -616,10 +611,10 @@ static int igt_mmap_offset_exhaustion(void *arg) drm_mm_remove_node(&resv); out_park: mutex_lock(&i915->drm.struct_mutex); - if (--i915->gt.active_requests) - queue_delayed_work(i915->wq, &i915->gt.retire_work, 0); - else + if (atomic_dec_and_test(&i915->gt.active_requests)) queue_delayed_work(i915->wq, &i915->gt.idle_work, 0); + else + queue_delayed_work(i915->wq, &i915->gt.retire_work, 0); mutex_unlock(&i915->drm.struct_mutex); i915_gem_shrinker_register(i915); return err; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 60bbf8b4df40..7afc5ee8dda5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -44,7 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915) mock_engine_flush(engine); i915_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); + GEM_BUG_ON(atomic_read(&i915->gt.active_requests)); } static void mock_device_release(struct drm_device *dev) @@ -203,6 +203,7 @@ struct drm_i915_private *mock_gem_device(void) i915_timelines_init(i915); + mutex_init(&i915->gt.active_mutex); INIT_LIST_HEAD(&i915->gt.active_rings); INIT_LIST_HEAD(&i915->gt.closed_vma); -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx