On Sun, Aug 07, 2016 at 03:45:09PM +0100, Chris Wilson wrote: > In the debate as to whether the second read of active->request is > ordered after the dependent reads of the first read of active->request, > just give in and throw a smp_rmb() in there so that ordering of loads is > assured. > > v2: Explain the manual smp_rmb() > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> > Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx> r-b confirmed. -Daniel > --- > drivers/gpu/drm/i915/i915_gem.c | 25 ++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_gem_request.h | 3 +++ > 2 files changed, 23 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index f4f8eaa90f2a..654f0b015f97 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3735,7 +3735,7 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, > i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); > } > > -static __always_inline unsigned __busy_read_flag(unsigned int id) > +static __always_inline unsigned int __busy_read_flag(unsigned int id) > { > /* Note that we could alias engines in the execbuf API, but > * that would be very unwise as it prevents userspace from > @@ -3753,7 +3753,7 @@ static __always_inline unsigned int __busy_write_id(unsigned int id) > return id; > } > > -static __always_inline unsigned > +static __always_inline unsigned int > __busy_set_if_active(const struct i915_gem_active *active, > unsigned int (*flag)(unsigned int id)) > { > @@ -3770,19 +3770,34 @@ __busy_set_if_active(const struct i915_gem_active *active, > > id = request->engine->exec_id; > > - /* Check that the pointer wasn't reassigned and overwritten. */ > + /* Check that the pointer wasn't reassigned and overwritten. > + * > + * In __i915_gem_active_get_rcu(), we enforce ordering between > + * the first rcu pointer dereference (imposing a > + * read-dependency only on access through the pointer) and > + * the second lockless access through the memory barrier > + * following a successful atomic_inc_not_zero(). Here there > + * is no such barrier, and so we must manually insert an > + * explicit read barrier to ensure that the following > + * access occurs after all the loads through the first > + * pointer. > + * > + * The corresponding write barrier is part of > + * rcu_assign_pointer(). > + */ > + smp_rmb(); > if (request == rcu_access_pointer(active->request)) > return flag(id); > } while (1); > } > > -static inline unsigned > +static __always_inline unsigned int > busy_check_reader(const struct i915_gem_active *active) > { > return __busy_set_if_active(active, __busy_read_flag); > } > > -static inline unsigned > +static __always_inline unsigned int > busy_check_writer(const struct i915_gem_active *active) > { > return __busy_set_if_active(active, __busy_write_id); > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index 3496e28785e7..b2456dede3ad 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -497,6 +497,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) > * incremented) then the following read for rcu_access_pointer() > * must occur after the atomic operation and so confirm > * that this request is the one currently being tracked. > + * > + * The corresponding write barrier is part of > + * rcu_assign_pointer(). > */ > if (!request || request == rcu_access_pointer(active->request)) > return rcu_pointer_handoff(request); > -- > 2.8.1 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx