On 18/11/2019 23:02, Chris Wilson wrote:
Previously, we assumed we could use mutex_trylock() within an atomic
context, falling back to a working if contended. However, such trickery
is illegal inside interrupt context, and so we need to always use a
worker under such circumstances. As we normally are in process context,
we can typically use a plain mutex, and only defer to a work when we
know we are caller from an interrupt path.
Fixes: 51fbd8de87dc ("drm/i915/pmu: Atomically acquire the gt_pm wakeref")
References: a0855d24fc22d ("locking/mutex: Complain upon mutex API misuse in IRQ contexts")
References: https://bugs.freedesktop.org/show_bug.cgi?id=111626
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
drivers/gpu/drm/i915/gt/intel_engine_pm.c | 3 +-
drivers/gpu/drm/i915/gt/intel_engine_pm.h | 10 +++++
drivers/gpu/drm/i915/gt/intel_gt_pm.c | 1 -
drivers/gpu/drm/i915/gt/intel_gt_pm.h | 5 +++
drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +-
drivers/gpu/drm/i915/gt/intel_reset.c | 2 +-
drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 7 ++--
drivers/gpu/drm/i915/i915_active.c | 5 ++-
drivers/gpu/drm/i915/i915_pmu.c | 6 +--
drivers/gpu/drm/i915/intel_wakeref.c | 8 ++--
drivers/gpu/drm/i915/intel_wakeref.h | 44 ++++++++++++++++----
11 files changed, 68 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 722d3eec226e..4878d16176d5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -180,7 +180,8 @@ static int __engine_park(struct intel_wakeref *wf)
engine->execlists.no_priolist = false;
- intel_gt_pm_put(engine->gt);
+ /* While we call i915_vma_parked, we have to break the lock cycle */
+ intel_gt_pm_put_async(engine->gt);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 739c50fefcef..467475fce9c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
intel_wakeref_put(&engine->wakeref);
}
+static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
+{
+ intel_wakeref_put_async(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_unlock_wait(struct intel_engine_cs *engine)
+{
+ intel_wakeref_unlock_wait(&engine->wakeref);
+}
+
void intel_engine_init__pm(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index e61f752a3cd5..7a9044ac4b75 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf)
static const struct intel_wakeref_ops wf_ops = {
.get = __gt_unpark,
.put = __gt_park,
- .flags = INTEL_WAKEREF_PUT_ASYNC,
};
void intel_gt_pm_init_early(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b3e17399be9b..990efc27a4e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
intel_wakeref_put(>->wakeref);
}
+static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+{
+ intel_wakeref_put_async(>->wakeref);
+}
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(>->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index f7c8fec436a9..fec46afb9264 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1173,7 +1173,7 @@ __execlists_schedule_out(struct i915_request *rq,
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
- intel_gt_pm_put(engine->gt);
+ intel_gt_pm_put_async(engine->gt);
Wait a minute.. wasn't the wakeref hierarchy supposed to be engine -> gt
so this place should be on the engine level?
/*
* If this is part of a virtual engine, its next request may
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b7007cd78c6f..0388f9375366 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1125,7 +1125,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
out:
intel_engine_cancel_stop_cs(engine);
reset_finish_engine(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 20b9c83f43ad..851a6c4f65c6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
engine->name, p->name);
else
- intel_engine_pm_put(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
+ intel_engine_pm_put_async(engine);
p->critical_section_end();
- /* engine wakeref is sync (instant) */
+ intel_engine_pm_unlock_wait(engine);
From the context would it be clearer to name it
intel_engine_pm_wait_put? sync_put? flush_put? To more clearly represent
it is a pair of put_async.
+
if (intel_engine_pm_is_awake(engine)) {
pr_err("%s is still awake after flushing pm\n",
engine->name);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 5448f37c8102..dca15ace88f6 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -672,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
* populated by i915_request_add_active_barriers() to point to the
* request that will eventually release them.
*/
- spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING);
llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
struct active_node *node = barrier_from_ll(pos);
struct intel_engine_cs *engine = barrier_to_engine(node);
struct rb_node **p, *parent;
+ spin_lock_irqsave_nested(&ref->tree_lock, flags,
+ SINGLE_DEPTH_NESTING);
parent = NULL;
p = &ref->tree.rb_node;
while (*p) {
@@ -693,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
}
rb_link_node(&node->node, parent, p);
rb_insert_color(&node->node, &ref->tree);
+ spin_unlock_irqrestore(&ref->tree_lock, flags);
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
llist_add(barrier_to_ll(node), &engine->barrier_tasks);
intel_engine_pm_put(engine);
}
- spin_unlock_irqrestore(&ref->tree_lock, flags);
Pros/cons of leaving the locks where they were and using put_async,
versus this layout?
}
void i915_request_add_active_barriers(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 9b02be0ad4e6..95e824a78d4d 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt)
val = 0;
if (intel_gt_pm_get_if_awake(gt)) {
val = __get_rc6(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_async(gt);
}
spin_lock_irqsave(&pmu->lock, flags);
@@ -360,7 +360,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
skip:
if (unlikely(mmio_lock))
spin_unlock_irqrestore(mmio_lock, flags);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
}
}
@@ -398,7 +398,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
if (stat)
val = intel_get_cagf(rps, stat);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_async(gt);
}
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 868cc78048d0..9b29176cc1ca 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf)
static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
{
- if (!atomic_dec_and_test(&wf->count))
+ INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
+ if (unlikely(!atomic_dec_and_test(&wf->count)))
goto unlock;
/* ops->put() must reschedule its own release on error/deferral */
@@ -67,13 +68,12 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
mutex_unlock(&wf->mutex);
}
-void __intel_wakeref_put_last(struct intel_wakeref *wf)
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
{
INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
/* Assume we are not in process context and so cannot sleep. */
- if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
- !mutex_trylock(&wf->mutex)) {
+ if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
schedule_work(&wf->work);
return;
}
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 5f0c972a80fb..688b9b536a69 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/bits.h>
+#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/refcount.h>
#include <linux/stackdepot.h>
@@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t;
struct intel_wakeref_ops {
int (*get)(struct intel_wakeref *wf);
int (*put)(struct intel_wakeref *wf);
-
- unsigned long flags;
-#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
};
struct intel_wakeref {
@@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
} while (0)
int __intel_wakeref_get_first(struct intel_wakeref *wf);
-void __intel_wakeref_put_last(struct intel_wakeref *wf);
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
/**
* intel_wakeref_get: Acquire the wakeref
@@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
}
/**
- * intel_wakeref_put: Release the wakeref
- * @i915: the drm_i915_private device
+ * intel_wakeref_put_flags: Release the wakeref
* @wf: the wakeref
- * @fn: callback for releasing the wakeref, called only on final release.
+ * @flags: control flags
*
* Release our hold on the wakeref. When there are no more users,
* the runtime pm wakeref will be released after the @fn callback is called
@@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
* code otherwise.
*/
static inline void
-intel_wakeref_put(struct intel_wakeref *wf)
+__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
+#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
{
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
- __intel_wakeref_put_last(wf);
+ __intel_wakeref_put_last(wf, flags);
+}
+
+static inline void
+intel_wakeref_put(struct intel_wakeref *wf)
+{
+ might_sleep();
+ __intel_wakeref_put(wf, 0);
+}
+
+static inline void
+intel_wakeref_put_async(struct intel_wakeref *wf)
+{
+ __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
}
/**
@@ -151,6 +162,20 @@ intel_wakeref_unlock(struct intel_wakeref *wf)
mutex_unlock(&wf->mutex);
}
+/**
+ * intel_wakeref_unlock_wait: Wait until the active callback is complete
+ * @wf: the wakeref
+ *
+ * Waits for the active callback (under the @wf->mtuex) is complete.
+ */
+static inline void
+intel_wakeref_unlock_wait(struct intel_wakeref *wf)
+{
+ mutex_lock(&wf->mutex);
+ mutex_unlock(&wf->mutex);
+ flush_work(&wf->work);
+}
+
/**
* intel_wakeref_is_active: Query whether the wakeref is currently held
* @wf: the wakeref
@@ -170,6 +195,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
static inline void
__intel_wakeref_defer_park(struct intel_wakeref *wf)
{
+ lockdep_assert_held(&wf->mutex);
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
atomic_set_release(&wf->count, 1);
}
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx