In the debate as to whether the second read of active->request is ordered after the dependent reads of the first read of active->request, just give in and throw a smp_rmb() in there so that ordering of loads is assured. v2: Explain the manual smp_rmb() Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx> --- drivers/gpu/drm/i915/i915_gem.c | 25 ++++++++++++++++++++----- drivers/gpu/drm/i915/i915_gem_request.h | 3 +++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f4f8eaa90f2a..654f0b015f97 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3735,7 +3735,7 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); } -static __always_inline unsigned __busy_read_flag(unsigned int id) +static __always_inline unsigned int __busy_read_flag(unsigned int id) { /* Note that we could alias engines in the execbuf API, but * that would be very unwise as it prevents userspace from @@ -3753,7 +3753,7 @@ static __always_inline unsigned int __busy_write_id(unsigned int id) return id; } -static __always_inline unsigned +static __always_inline unsigned int __busy_set_if_active(const struct i915_gem_active *active, unsigned int (*flag)(unsigned int id)) { @@ -3770,19 +3770,34 @@ __busy_set_if_active(const struct i915_gem_active *active, id = request->engine->exec_id; - /* Check that the pointer wasn't reassigned and overwritten. */ + /* Check that the pointer wasn't reassigned and overwritten. + * + * In __i915_gem_active_get_rcu(), we enforce ordering between + * the first rcu pointer dereference (imposing a + * read-dependency only on access through the pointer) and + * the second lockless access through the memory barrier + * following a successful atomic_inc_not_zero(). Here there + * is no such barrier, and so we must manually insert an + * explicit read barrier to ensure that the following + * access occurs after all the loads through the first + * pointer. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). + */ + smp_rmb(); if (request == rcu_access_pointer(active->request)) return flag(id); } while (1); } -static inline unsigned +static __always_inline unsigned int busy_check_reader(const struct i915_gem_active *active) { return __busy_set_if_active(active, __busy_read_flag); } -static inline unsigned +static __always_inline unsigned int busy_check_writer(const struct i915_gem_active *active) { return __busy_set_if_active(active, __busy_write_id); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 3496e28785e7..b2456dede3ad 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -497,6 +497,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * incremented) then the following read for rcu_access_pointer() * must occur after the atomic operation and so confirm * that this request is the one currently being tracked. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). */ if (!request || request == rcu_access_pointer(active->request)) return rcu_pointer_handoff(request); -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx