Re: [PATCH 1/3] drm/i915: Move user fault tracking to a separate list

Daniel Vetter <daniel@xxxxxxxx> · Thu, 13 Oct 2016 16:40:40 +0200

On Tue, Oct 11, 2016 at 03:37:57PM +0100, Chris Wilson wrote:
> We want to decouple RPM and struct_mutex, but currently RPM has to walk
> the list of bound objects and remove userspace mmapping before we
> suspend (otherwise userspace may continue to access the GTT whilst it is
> powered down). This currently requires the struct_mutex to walk the
> bound_list, but if we move that to a separate list and lock we can take
> the first step towards removing the struct_mutex.
> 
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c   |  4 ++--
>  drivers/gpu/drm/i915/i915_drv.h       | 20 +++++++++++-------
>  drivers/gpu/drm/i915/i915_gem.c       | 39 +++++++++++++++++++++++++++--------
>  drivers/gpu/drm/i915/i915_gem_evict.c |  2 +-
>  4 files changed, 46 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 358663e833d6..d4ecc5283c2a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -186,11 +186,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  	}
>  	if (obj->stolen)
>  		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
> -	if (obj->pin_display || obj->fault_mappable) {
> +	if (obj->pin_display || !list_empty(&obj->userfault_link)) {
>  		char s[3], *t = s;
>  		if (obj->pin_display)
>  			*t++ = 'p';
> -		if (obj->fault_mappable)
> +		if (!list_empty(&obj->userfault_link))
>  			*t++ = 'f';
>  		*t = '\0';
>  		seq_printf(m, " (%s mappable)", s);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bf397b643cc0..72b3126c6c74 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1359,6 +1359,14 @@ struct i915_gem_mm {
>  	 */
>  	struct list_head unbound_list;
>  
> +	/** Protects access to the userfault_list */
> +	spinlock_t userfault_lock;
> +
> +	/** List of all objects in gtt_space, currently mmaped by userspace.
> +	 * All objects within this list must also be on bound_list.
> +	 */
> +	struct list_head userfault_list;
> +
>  	/** Usable portion of the GTT for GEM */
>  	unsigned long stolen_base; /* limited to low memory (32-bit) */
>  
> @@ -2215,6 +2223,11 @@ struct drm_i915_gem_object {
>  	struct drm_mm_node *stolen;
>  	struct list_head global_list;
>  
> +	/**
> +	 * Whether the object is currently in the GGTT mmap.
> +	 */
> +	struct list_head userfault_link;
> +
>  	/** Used in execbuf to temporarily hold a ref */
>  	struct list_head obj_exec_link;
>  
> @@ -2242,13 +2255,6 @@ struct drm_i915_gem_object {
>  	 */
>  	unsigned int madv:2;
>  
> -	/**
> -	 * Whether the current gtt mapping needs to be mappable (and isn't just
> -	 * mappable by accident). Track pin and fault separate for a more
> -	 * accurate mappable working set.
> -	 */
> -	unsigned int fault_mappable:1;
> -
>  	/*
>  	 * Is the object to be mapped as read-only to the GPU
>  	 * Only honoured if hardware has relevant pte bit
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fdd496e6c081..91910ffe0964 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1850,7 +1850,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
>  	if (ret)
>  		goto err_unpin;
>  
> -	obj->fault_mappable = true;
> +	spin_lock(&dev_priv->mm.userfault_lock);
> +	list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
> +	spin_unlock(&dev_priv->mm.userfault_lock);

I think we need to add it to the list _before_ we start punching in new
ptes. At least if we want to decouple the locking and rpm refcounting a
lot more (which I think we should, I had magic locks that ensure ordering
on their own simply by being a BKL). But right now it's ok, so just a
bikeshed.

> +
>  err_unpin:
>  	__i915_vma_unpin(vma);
>  err_unlock:
> @@ -1918,36 +1921,52 @@ err:
>  void
>  i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  {
> +	struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +	bool zap = false;
> +
>  	/* Serialisation between user GTT access and our code depends upon
>  	 * revoking the CPU's PTE whilst the mutex is held. The next user
>  	 * pagefault then has to wait until we release the mutex.
> +	 *
> +	 * Note that RPM complicates somewhat by adding an additional
> +	 * requirement that operations to the GGTT be made holding the RPM
> +	 * wakeref.
>  	 */
>  	lockdep_assert_held(&obj->base.dev->struct_mutex);
>  
> -	if (!obj->fault_mappable)
> +	spin_lock(&i915->mm.userfault_lock);
> +	if (!list_empty(&obj->userfault_link)) {
> +		list_del_init(&obj->userfault_link);
> +		zap = true;
> +	}
> +	spin_unlock(&i915->mm.userfault_lock);
> +	if (!zap)
>  		return;
>  
>  	drm_vma_node_unmap(&obj->base.vma_node,
>  			   obj->base.dev->anon_inode->i_mapping);
>  
>  	/* Ensure that the CPU's PTE are revoked and there are not outstanding
> -	 * memory transactions from userspace before we return. The TLB
> -	 * flushing implied above by changing the PTE above *should* be
> +	 * memory transactions from userspace before we return. The TLB 
> +	 * flushing implied above by changing the PTE above *should* be 
>  	 * sufficient, an extra barrier here just provides us with a bit
>  	 * of paranoid documentation about our requirement to serialise
> -	 * memory writes before touching registers / GSM.
> +	 * memory writes before touching registers / GSM. 
>  	 */
>  	wmb();
> -
> -	obj->fault_mappable = false;
>  }
>  
>  void
>  i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
>  {
> -	struct drm_i915_gem_object *obj;
> +	struct drm_i915_gem_object *obj, *on;
	/* protected by dev_priv->mm.userfault_lock */

Since I had to think about it for a while ;-)

> +	struct list_head userfault_list;
> +
> +	spin_lock(&dev_priv->mm.userfault_lock);
> +	list_replace_init(&dev_priv->mm.userfault_list, &userfault_list);
> +	spin_unlock(&dev_priv->mm.userfault_lock);
>  
> -	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> +	list_for_each_entry_safe(obj, on, &userfault_list, userfault_link)
>  		i915_gem_release_mmap(obj);
>  }
>  
> @@ -4066,6 +4085,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  	int i;
>  
>  	INIT_LIST_HEAD(&obj->global_list);
> +	INIT_LIST_HEAD(&obj->userfault_link);
>  	for (i = 0; i < I915_NUM_ENGINES; i++)
>  		init_request_active(&obj->last_read[i],
>  				    i915_gem_object_retire__read);
> @@ -4441,6 +4461,7 @@ int i915_gem_init(struct drm_device *dev)
>  	int ret;
>  
>  	mutex_lock(&dev->struct_mutex);
> +	spin_lock_init(&dev_priv->mm.userfault_lock);
>  
>  	if (!i915.enable_execlists) {
>  		dev_priv->gt.resume = intel_legacy_submission_resume;
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index 5b6f81c1dbca..ca175c3063ed 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -55,7 +55,7 @@ mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind)
>  	if (WARN_ON(!list_empty(&vma->exec_list)))
>  		return false;
>  
> -	if (flags & PIN_NONFAULT && vma->obj->fault_mappable)
> +	if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link))

Racy access of list_empty. Where's our friend COMPUTE_ONCE again? Since
there it's just an optimization there's no issue with racing, except for
diverging control flow in case gcc decides to recompute this.

I think that's the only one I spotted, with that addressed:

Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx>

>  		return false;
>  
>  	list_add(&vma->exec_list, unwind);

Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx>
> -- 
> 2.9.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx