Re: [CI 13/19] drm/i915: Remove (struct_mutex) locking for busy-ioctl

Daniel Vetter <daniel@xxxxxxxx> · Fri, 5 Aug 2016 21:08:34 +0200



On Fri, Aug 05, 2016 at 10:14:18AM +0100, Chris Wilson wrote:
> By applying the same logic as for wait-ioctl, we can query whether a
> request has completed without holding struct_mutex. The biggest impact
> system-wide is removing the flush_active and the contention that causes.
> 
> Testcase: igt/gem_busy
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Akash Goel <akash.goel@xxxxxxxxx>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 131 +++++++++++++++++++++++++++++++---------
>  1 file changed, 101 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index ceb00970b2da..b99d64bfb7eb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3736,49 +3736,120 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
>  	i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
>  }
>  
> +static __always_inline unsigned __busy_read_flag(unsigned int id)
> +{
> +	/* Note that we could alias engines in the execbuf API, but
> +	 * that would be very unwise as it prevents userspace from
> +	 * fine control over engine selection. Ahem.
> +	 *
> +	 * This should be something like EXEC_MAX_ENGINE instead of
> +	 * I915_NUM_ENGINES.
> +	 */
> +	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
> +	return 0x10000 << id;
> +}
> +
> +static __always_inline unsigned int __busy_write_id(unsigned int id)
> +{
> +	return id;
> +}
> +
> +static __always_inline unsigned
> +__busy_set_if_active(const struct i915_gem_active *active,
> +		     unsigned int (*flag)(unsigned int id))
> +{
> +	/* For more discussion about the barriers and locking concerns,
> +	 * see __i915_gem_active_get_rcu().
> +	 */
> +	do {
> +		struct drm_i915_gem_request *request;
> +		unsigned int id;
> +
> +		request = rcu_dereference(active->request);
> +		if (!request || i915_gem_request_completed(request))
> +			return 0;
> +
> +		id = request->engine->exec_id;
> +
> +		/* Check that the pointer wasn't reassigned and overwritten. */

cf. our discussion in active_get_rcu - there's no fence_get_rcu in sight
anywhere here, hence this needs an smp_rmb(). Also nitpick: The two
rcu_dereference(actove->request) feel a bit silly. If we move the first in
front of the loop, and update the local request pointer (using a tmp) it
would look tidier, and we could even move the loop termination condition
into the while () check (and move the return flag(id) at the end of the
function).
-Daniel

> +		if (request == rcu_access_pointer(active->request))
> +			return flag(id);
> +	} while (1);
> +}
> +
> +static inline unsigned
> +busy_check_reader(const struct i915_gem_active *active)
> +{
> +	return __busy_set_if_active(active, __busy_read_flag);
> +}
> +
> +static inline unsigned
> +busy_check_writer(const struct i915_gem_active *active)
> +{
> +	return __busy_set_if_active(active, __busy_write_id);
> +}
> +
>  int
>  i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  		    struct drm_file *file)
>  {
>  	struct drm_i915_gem_busy *args = data;
>  	struct drm_i915_gem_object *obj;
> -	int ret;
> -
> -	ret = i915_mutex_lock_interruptible(dev);
> -	if (ret)
> -		return ret;
> +	unsigned long active;
>  
>  	obj = i915_gem_object_lookup(file, args->handle);
> -	if (!obj) {
> -		ret = -ENOENT;
> -		goto unlock;
> -	}
> +	if (!obj)
> +		return -ENOENT;
>  
> -	/* Count all active objects as busy, even if they are currently not used
> -	 * by the gpu. Users of this interface expect objects to eventually
> -	 * become non-busy without any further actions.
> -	 */
>  	args->busy = 0;
> -	if (i915_gem_object_is_active(obj)) {
> -		struct drm_i915_gem_request *req;
> -		int i;
> +	active = __I915_BO_ACTIVE(obj);
> +	if (active) {
> +		int idx;
>  
> -		for (i = 0; i < I915_NUM_ENGINES; i++) {
> -			req = i915_gem_active_peek(&obj->last_read[i],
> -						   &obj->base.dev->struct_mutex);
> -			if (req)
> -				args->busy |= 1 << (16 + req->engine->exec_id);
> -		}
> -		req = i915_gem_active_peek(&obj->last_write,
> -					   &obj->base.dev->struct_mutex);
> -		if (req)
> -			args->busy |= req->engine->exec_id;
> +		/* Yes, the lookups are intentionally racy.
> +		 *
> +		 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
> +		 * to regard the value as stale and as our ABI guarantees
> +		 * forward progress, we confirm the status of each active
> +		 * request with the hardware.
> +		 *
> +		 * Even though we guard the pointer lookup by RCU, that only
> +		 * guarantees that the pointer and its contents remain
> +		 * dereferencable and does *not* mean that the request we
> +		 * have is the same as the one being tracked by the object.
> +		 *
> +		 * Consider that we lookup the request just as it is being
> +		 * retired and freed. We take a local copy of the pointer,
> +		 * but before we add its engine into the busy set, the other
> +		 * thread reallocates it and assigns it to a task on another
> +		 * engine with a fresh and incomplete seqno.
> +		 *
> +		 * So after we lookup the engine's id, we double check that
> +		 * the active request is the same and only then do we add it
> +		 * into the busy set.
> +		 */
> +		rcu_read_lock();
> +
> +		for_each_active(active, idx)
> +			args->busy |= busy_check_reader(&obj->last_read[idx]);
> +
> +		/* For ABI sanity, we only care that the write engine is in
> +		 * the set of read engines. This is ensured by the ordering
> +		 * of setting last_read/last_write in i915_vma_move_to_active,
> +		 * and then in reverse in retire.
> +		 *
> +		 * We don't care that the set of active read/write engines
> +		 * may change during construction of the result, as it is
> +		 * equally liable to change before userspace can inspect
> +		 * the result.
> +		 */
> +		args->busy |= busy_check_writer(&obj->last_write);
> +
> +		rcu_read_unlock();
>  	}
>  
> -	i915_gem_object_put(obj);
> -unlock:
> -	mutex_unlock(&dev->struct_mutex);
> -	return ret;
> +	i915_gem_object_put_unlocked(obj);
> +	return 0;
>  }
>  
>  int
> -- 
> 2.8.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx