Re: [PATCH 1/2] drm/i915: Allow overlapping userptr objects

Daniel Vetter <daniel@xxxxxxxx> · Thu, 10 Jul 2014 22:20:11 +0200



On Thu, Jul 10, 2014 at 10:21:43AM +0100, Chris Wilson wrote:
> Whilst I strongly advise against doing so for the implicit coherency
> issues between the multiple buffer objects accessing the same backing
> store, it nevertheless is a valid use case, akin to mmaping the same
> file multiple times.
> 
> The reason why we forbade it earlier was that our use of the interval
> tree for fast invalidation upon vma changes excluded overlapping
> objects. So in the case where the user wishes to create such pairs of
> overlapping objects, we degrade the range invalidation to walkin the
> linear list of objects associated with the mm.
> 
> v2: Compile for mmu-notifiers after tweaking
> 
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: "Li, Victor Y" <victor.y.li@xxxxxxxxx>
> Cc: "Kelley, Sean V" <sean.v.kelley@xxxxxxxxx>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx>
> Cc: "Gong, Zhipeng" <zhipeng.gong@xxxxxxxxx>
> Cc: Akash Goel <akash.goel@xxxxxxxxx>
> Cc: "Volkin, Bradley D" <bradley.d.volkin@xxxxxxxxx>

The commit message is a bit thin on why we need this. Sounds a bit like
userspace lost its marbles to me ...
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem_userptr.c | 139 ++++++++++++++++++++++++--------
>  1 file changed, 104 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 21ea928..7c38f50 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -40,19 +40,87 @@ struct i915_mmu_notifier {
>  	struct hlist_node node;
>  	struct mmu_notifier mn;
>  	struct rb_root objects;
> +	struct list_head linear;
>  	struct drm_device *dev;
>  	struct mm_struct *mm;
>  	struct work_struct work;
>  	unsigned long count;
>  	unsigned long serial;
> +	bool is_linear;
>  };
>  
>  struct i915_mmu_object {
>  	struct i915_mmu_notifier *mmu;
>  	struct interval_tree_node it;
> +	struct list_head link;
>  	struct drm_i915_gem_object *obj;
> +	bool is_linear;
>  };
>  
> +static unsigned long cancel_userptr(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	unsigned long end;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	/* Cancel any active worker and force us to re-evaluate gup */
> +	obj->userptr.work = NULL;
> +
> +	if (obj->pages != NULL) {
> +		struct drm_i915_private *dev_priv = to_i915(dev);
> +		struct i915_vma *vma, *tmp;
> +		bool was_interruptible;
> +
> +		was_interruptible = dev_priv->mm.interruptible;
> +		dev_priv->mm.interruptible = false;
> +
> +		list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
> +			int ret = i915_vma_unbind(vma);
> +			WARN_ON(ret && ret != -EIO);
> +		}
> +		WARN_ON(i915_gem_object_put_pages(obj));
> +
> +		dev_priv->mm.interruptible = was_interruptible;
> +	}
> +
> +	end = obj->userptr.ptr + obj->base.size;
> +
> +	drm_gem_object_unreference(&obj->base);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	return end;
> +}
> +
> +static void invalidate_range__linear(struct i915_mmu_notifier *mn,
> +				     struct mm_struct *mm,
> +				     unsigned long start,
> +				     unsigned long end)
> +{
> +	struct i915_mmu_object *mmu;
> +	unsigned long serial;
> +
> +restart:
> +	serial = mn->serial;
> +	list_for_each_entry(mmu, &mn->linear, link) {
> +		struct drm_i915_gem_object *obj;
> +
> +		if (mmu->it.last < start || mmu->it.start > end)
> +			continue;
> +
> +		obj = mmu->obj;
> +		drm_gem_object_reference(&obj->base);
> +		spin_unlock(&mn->lock);
> +
> +		cancel_userptr(obj);
> +
> +		spin_lock(&mn->lock);
> +		if (serial != mn->serial)
> +			goto restart;
> +	}
> +
> +	spin_unlock(&mn->lock);
> +}
> +
>  static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>  						       struct mm_struct *mm,
>  						       unsigned long start,
> @@ -60,16 +128,19 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>  {
>  	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
>  	struct interval_tree_node *it = NULL;
> +	unsigned long next = start;
>  	unsigned long serial = 0;
>  
>  	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
> -	while (start < end) {
> +	while (next < end) {
>  		struct drm_i915_gem_object *obj;
>  
>  		obj = NULL;
>  		spin_lock(&mn->lock);
> +		if (mn->is_linear)
> +			return invalidate_range__linear(mn, mm, start, end);
>  		if (serial == mn->serial)
> -			it = interval_tree_iter_next(it, start, end);
> +			it = interval_tree_iter_next(it, next, end);
>  		else
>  			it = interval_tree_iter_first(&mn->objects, start, end);
>  		if (it != NULL) {
> @@ -81,31 +152,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>  		if (obj == NULL)
>  			return;
>  
> -		mutex_lock(&mn->dev->struct_mutex);
> -		/* Cancel any active worker and force us to re-evaluate gup */
> -		obj->userptr.work = NULL;
> -
> -		if (obj->pages != NULL) {
> -			struct drm_i915_private *dev_priv = to_i915(mn->dev);
> -			struct i915_vma *vma, *tmp;
> -			bool was_interruptible;
> -
> -			was_interruptible = dev_priv->mm.interruptible;
> -			dev_priv->mm.interruptible = false;
> -
> -			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
> -				int ret = i915_vma_unbind(vma);
> -				WARN_ON(ret && ret != -EIO);
> -			}
> -			WARN_ON(i915_gem_object_put_pages(obj));
> -
> -			dev_priv->mm.interruptible = was_interruptible;
> -		}
> -
> -		start = obj->userptr.ptr + obj->base.size;
> -
> -		drm_gem_object_unreference(&obj->base);
> -		mutex_unlock(&mn->dev->struct_mutex);
> +		next = cancel_userptr(obj);
>  	}
>  }
>  
> @@ -151,6 +198,8 @@ i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
>  	mmu->objects = RB_ROOT;
>  	mmu->count = 0;
>  	mmu->serial = 0;
> +	INIT_LIST_HEAD(&mmu->linear);
> +	mmu->is_linear = false;
>  
>  	/* Protected by mmap_sem (write-lock) */
>  	ret = __mmu_notifier_register(&mmu->mn, mm);
> @@ -197,6 +246,17 @@ static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
>  		mmu->serial = 1;
>  }
>  
> +static bool i915_mmu_notifier_is_linear(struct i915_mmu_notifier *mmu)
> +{
> +	struct i915_mmu_object *mn;
> +
> +	list_for_each_entry(mn, &mmu->linear, link)
> +		if (mn->is_linear)
> +			return true;
> +
> +	return false;
> +}
> +
>  static void
>  i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
>  		      struct i915_mmu_object *mn)
> @@ -205,6 +265,9 @@ i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
>  
>  	spin_lock(&mmu->lock);
>  	interval_tree_remove(&mn->it, &mmu->objects);
> +	list_del(&mn->link);
> +	if (mn->is_linear)
> +		mmu->is_linear = i915_mmu_notifier_is_linear(mmu);
>  	__i915_mmu_notifier_update_serial(mmu);
>  	spin_unlock(&mmu->lock);
>  
> @@ -230,7 +293,6 @@ i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
>  	 */
>  	i915_gem_retire_requests(mmu->dev);
>  
> -	/* Disallow overlapping userptr objects */
>  	spin_lock(&mmu->lock);
>  	it = interval_tree_iter_first(&mmu->objects,
>  				      mn->it.start, mn->it.last);
> @@ -243,14 +305,22 @@ i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
>  		 * to flush their object references upon which the object will
>  		 * be removed from the interval-tree, or the the range is
>  		 * still in use by another client and the overlap is invalid.
> +		 *
> +		 * If we do have an overlap, we cannot use the interval tree
> +		 * for fast range invalidation.
>  		 */
>  
>  		obj = container_of(it, struct i915_mmu_object, it)->obj;
> -		ret = obj->userptr.workers ? -EAGAIN : -EINVAL;
> -	} else {
> +		if (!obj->userptr.workers)
> +			mmu->is_linear = mn->is_linear = true;
> +		else
> +			ret = -EAGAIN;
> +	} else
>  		interval_tree_insert(&mn->it, &mmu->objects);
> +
> +	if (ret == 0) {
> +		list_add(&mn->link, &mmu->linear);
>  		__i915_mmu_notifier_update_serial(mmu);
> -		ret = 0;
>  	}
>  	spin_unlock(&mmu->lock);
>  	mutex_unlock(&mmu->dev->struct_mutex);
> @@ -611,12 +681,11 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
>   * We impose several restrictions upon the memory being mapped
>   * into the GPU.
>   * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> - * 2. It cannot overlap any other userptr object in the same address space.
> - * 3. It must be normal system memory, not a pointer into another map of IO
> + * 2. It must be normal system memory, not a pointer into another map of IO
>   *    space (e.g. it must not be a GTT mmapping of another object).
> - * 4. We only allow a bo as large as we could in theory map into the GTT,
> + * 3. We only allow a bo as large as we could in theory map into the GTT,
>   *    that is we limit the size to the total size of the GTT.
> - * 5. The bo is marked as being snoopable. The backing pages are left
> + * 4. The bo is marked as being snoopable. The backing pages are left
>   *    accessible directly by the CPU, but reads and writes by the GPU may
>   *    incur the cost of a snoop (unless you have an LLC architecture).
>   *
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx