Re: [PATCH 27/28] drm/i915: Allow vma binding to occur asynchronously

Matthew Auld <matthew.william.auld@xxxxxxxxx> · Mon, 10 Jun 2019 18:34:52 +0100

On Mon, 10 Jun 2019 at 08:21, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote:
>
> If we let pages be allocated asynchronously, we also then want to push
> the binding process into an asynchronous task. Make it so, utilising the
> recent improvements to fence error tracking and struct_mutex reduction.
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

[snip]

> +static int queue_async_bind(struct i915_vma *vma,
> +                           enum i915_cache_level cache_level,
> +                           u32 flags)
> +{
> +       bool ready = true;
> +
> +       /* We are not allowed to shrink inside vm->mutex! */
> +       vma->async.dma = kmalloc(sizeof(*vma->async.dma),
> +                                GFP_NOWAIT | __GFP_NOWARN);
> +       if (!vma->async.dma)
> +               return -ENOMEM;
> +
> +       dma_fence_init(vma->async.dma,
> +                      &async_bind_ops,
> +                      &async_lock,
> +                      vma->vm->i915->mm.unordered_timeline,
> +                      0);
> +
> +       /* XXX find and avoid allocations under reservation_object locks */
> +       if (!i915_vma_trylock(vma)) {
> +               kfree(fetch_and_zero(&vma->async.dma));
> +               return -EAGAIN;
> +       }
> +
> +       if (rcu_access_pointer(vma->resv->fence_excl)) { /* async pages */
> +               struct dma_fence *f = reservation_object_get_excl(vma->resv);
> +
> +               if (!dma_fence_add_callback(f,
> +                                           &vma->async.cb,
> +                                           __queue_async_bind))
> +                       ready = false;
> +       }
> +       reservation_object_add_excl_fence(vma->resv, vma->async.dma);
> +       i915_vma_unlock(vma);
> +
> +       i915_vm_get(vma->vm);
> +       i915_vma_get(vma);
> +       __i915_vma_pin(vma); /* avoid being shrunk */
> +
> +       vma->async.cache_level = cache_level;
> +       vma->async.flags = flags;

Do we need to move this stuff to before the add_callback?

> +
> +       if (ready)
> +               __queue_async_bind(vma->async.dma, &vma->async.cb);
> +
> +       return 0;
> +}
> +
>  /**
>   * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
>   * @vma: VMA to map
> @@ -293,17 +405,12 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>         u32 vma_flags;
>         int ret;
>
> +       GEM_BUG_ON(!flags);
>         GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>         GEM_BUG_ON(vma->size > vma->node.size);
> -
> -       if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
> -                                             vma->node.size,
> -                                             vma->vm->total)))
> -               return -ENODEV;
> -
> -       if (GEM_DEBUG_WARN_ON(!flags))
> -               return -EINVAL;
> -
> +       GEM_BUG_ON(range_overflows(vma->node.start,
> +                                  vma->node.size,
> +                                  vma->vm->total));
>         bind_flags = 0;
>         if (flags & PIN_GLOBAL)
>                 bind_flags |= I915_VMA_GLOBAL_BIND;
> @@ -318,14 +425,18 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,

Are we aiming for vma_bind/vma_bind_async/vma_pin/vma_pin_async etc ?

>         if (bind_flags == 0)
>                 return 0;
>
> -       GEM_BUG_ON(!vma->pages);
> +       if ((bind_flags & ~vma_flags) & I915_VMA_LOCAL_BIND)
> +               bind_flags |= I915_VMA_ALLOC_BIND;
>
>         trace_i915_vma_bind(vma, bind_flags);
> -       ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
> +       if (bind_flags & I915_VMA_ALLOC_BIND)
> +               ret = queue_async_bind(vma, cache_level, bind_flags);
> +       else
> +               ret = __vma_bind(vma, cache_level, bind_flags);
>         if (ret)
>                 return ret;

Looks like clear_pages() is called unconditionally in vma_remove() if
we error out here, even though that is now set as part of the worker?

>
> -       vma->flags |= bind_flags;
> +       vma->flags |= bind_flags & ~I915_VMA_ALLOC_BIND;
>         return 0;
>  }
>
> @@ -569,7 +680,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>         }
>
>         if (vma->obj) {
> -               ret = i915_gem_object_pin_pages(vma->obj);
> +               ret = i915_gem_object_pin_pages_async(vma->obj);
>                 if (ret)
>                         return ret;
>
> @@ -578,25 +689,19 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>                 cache_level = 0;
>         }
>
> -       GEM_BUG_ON(vma->pages);
> -
> -       ret = vma->ops->set_pages(vma);
> -       if (ret)
> -               goto err_unpin;
> -

I guess one casualty here is the !offset_fixed path for
huge-gtt-pages, since we need to inspect the vma->page_sizes below.
Though probably not the end of the world if that's the case.

>         if (flags & PIN_OFFSET_FIXED) {
>                 u64 offset = flags & PIN_OFFSET_MASK;
>                 if (!IS_ALIGNED(offset, alignment) ||
>                     range_overflows(offset, size, end)) {
>                         ret = -EINVAL;
> -                       goto err_clear;
> +                       goto err_unpin;
>                 }
>
>                 ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
>                                            size, offset, cache_level,
>                                            flags);
>                 if (ret)
> -                       goto err_clear;
> +                       goto err_unpin;
>         } else {
>                 /*
>                  * We only support huge gtt pages through the 48b PPGTT,
> @@ -635,7 +740,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>                                           size, alignment, cache_level,
>                                           start, end, flags);
>                 if (ret)
> -                       goto err_clear;
> +                       goto err_unpin;
>
>                 GEM_BUG_ON(vma->node.start < start);
>                 GEM_BUG_ON(vma->node.start + vma->node.size > end);
> @@ -654,8 +759,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>
>         return 0;
>
> -err_clear:
> -       vma->ops->clear_pages(vma);
>  err_unpin:
>         if (vma->obj)
>                 i915_gem_object_unpin_pages(vma->obj);
> @@ -790,7 +893,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>
>                 spin_lock(&obj->vma.lock);
>                 list_del(&vma->obj_link);
> -               rb_erase(&vma->obj_node, &vma->obj->vma.tree);
> +               rb_erase(&vma->obj_node, &obj->vma.tree);
>                 spin_unlock(&obj->vma.lock);
>         }
>
> @@ -930,6 +1033,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>
>         lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
>
> +       if (vma->async.dma &&
> +           dma_fence_wait_timeout(vma->async.dma, true,
> +                                  MAX_SCHEDULE_TIMEOUT) < 0)
> +               return -EINTR;
> +
>         ret = i915_active_wait(&vma->active);
>         if (ret)
>                 return ret;
> @@ -975,6 +1083,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>         }
>         vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
>
> +       dma_fence_put(fetch_and_zero(&vma->async.dma));
> +
>         i915_vma_remove(vma);
>
>         return 0;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 71088ff4ad59..67e43f5d01f6 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -103,6 +103,7 @@ struct i915_vma {
>  #define I915_VMA_GLOBAL_BIND   BIT(9)
>  #define I915_VMA_LOCAL_BIND    BIT(10)
>  #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
> +#define I915_VMA_ALLOC_BIND    I915_VMA_PIN_OVERFLOW /* not stored */
>
>  #define I915_VMA_GGTT          BIT(11)
>  #define I915_VMA_CAN_FENCE     BIT(12)
> @@ -143,6 +144,14 @@ struct i915_vma {
>         unsigned int *exec_flags;
>         struct hlist_node exec_node;
>         u32 exec_handle;
> +
> +       struct i915_vma_async_bind {
> +               struct dma_fence *dma;
> +               struct dma_fence_cb cb;
> +               struct work_struct work;
> +               unsigned int cache_level;
> +               unsigned int flags;
> +       } async;
>  };
>
>  struct i915_vma *
> @@ -305,6 +314,11 @@ static inline void i915_vma_lock(struct i915_vma *vma)
>         reservation_object_lock(vma->resv, NULL);
>  }
>
> +static inline bool i915_vma_trylock(struct i915_vma *vma)
> +{
> +       return reservation_object_trylock(vma->resv);
> +}
> +
>  static inline void i915_vma_unlock(struct i915_vma *vma)
>  {
>         reservation_object_unlock(vma->resv);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index a166d9405a94..615ac485c731 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -204,8 +204,10 @@ static int igt_vma_create(void *arg)
>                 mock_context_close(ctx);
>         }
>
> -       list_for_each_entry_safe(obj, on, &objects, st_link)
> +       list_for_each_entry_safe(obj, on, &objects, st_link) {
> +               i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT);
>                 i915_gem_object_put(obj);
> +       }
>         return err;
>  }
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx