Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 6, 2015 at 4:23 PM, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote:
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
>
> v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary
> and fixed objects within the same batch
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: "Daniel, Thomas" <thomas.daniel@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>  drivers/gpu/drm/i915/i915_drv.h            | 10 +++--
>  drivers/gpu/drm/i915/i915_gem.c            | 68 +++++++++++++++++++++---------
>  drivers/gpu/drm/i915/i915_gem_evict.c      | 61 +++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 +++-
>  drivers/gpu/drm/i915/i915_trace.h          | 23 ++++++++++
>  include/uapi/drm/i915_drm.h                |  4 +-
>  7 files changed, 151 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index ab37d1121be8..cd79ef114b8e 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>         case I915_PARAM_HAS_RESOURCE_STREAMER:
>                 value = HAS_RESOURCE_STREAMER(dev);
>                 break;
> +       case I915_PARAM_HAS_EXEC_SOFTPIN:
> +               value = 1;
> +               break;
>         default:
>                 DRM_DEBUG("Unknown parameter %d\n", param->param);
>                 return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a0ce011a5dc0..7d351d991022 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2778,10 +2778,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>  #define PIN_NONBLOCK   (1<<1)
>  #define PIN_GLOBAL     (1<<2)
>  #define PIN_OFFSET_BIAS        (1<<3)
> -#define PIN_USER       (1<<4)
> -#define PIN_UPDATE     (1<<5)
> -#define PIN_ZONE_4G    (1<<6)
> -#define PIN_HIGH       (1<<7)
> +#define PIN_OFFSET_FIXED (1<<4)
> +#define PIN_USER       (1<<5)
> +#define PIN_UPDATE     (1<<6)
> +#define PIN_ZONE_4G    (1<<7)
> +#define PIN_HIGH       (1<<8)
>  #define PIN_OFFSET_MASK (~4095)
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
> @@ -3127,6 +3128,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>                                           unsigned long start,
>                                           unsigned long end,
>                                           unsigned flags);
> +int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags);
>  int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>
>  /* belongs in i915_gem_gtt.h */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8fe3df0cdcb8..82efd6a6dee0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3334,7 +3334,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>         struct drm_device *dev = obj->base.dev;
>         struct drm_i915_private *dev_priv = dev->dev_private;
>         u64 start, end;
> -       u32 search_flag, alloc_flag;
>         struct i915_vma *vma;
>         int ret;
>
> @@ -3409,30 +3408,53 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>         if (IS_ERR(vma))
>                 goto err_unpin;
>
> -       if (flags & PIN_HIGH) {
> -               search_flag = DRM_MM_SEARCH_BELOW;
> -               alloc_flag = DRM_MM_CREATE_TOP;
> +       if (flags & PIN_OFFSET_FIXED) {
> +               uint64_t offset = flags & PIN_OFFSET_MASK;
> +               if (offset & (alignment - 1) || offset + size > end) {
> +                       vma = ERR_PTR(-EINVAL);
> +                       goto err_free_vma;
> +               }
> +               vma->node.start = offset;
> +               vma->node.size = size;
> +               vma->node.color = obj->cache_level;
> +               ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +               if (ret) {
> +                       ret = i915_gem_evict_for_vma(vma, flags);
> +                       if (ret == 0)
> +                               ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +               }
> +               if (ret) {
> +                       vma = ERR_PTR(ret);
> +                       goto err_free_vma;
> +               }
>         } else {
> -               search_flag = DRM_MM_SEARCH_DEFAULT;
> -               alloc_flag = DRM_MM_CREATE_DEFAULT;
> -       }
> +               u32 search_flag, alloc_flag;
> +
> +               if (flags & PIN_HIGH) {
> +                       search_flag = DRM_MM_SEARCH_BELOW;
> +                       alloc_flag = DRM_MM_CREATE_TOP;
> +               } else {
> +                       search_flag = DRM_MM_SEARCH_DEFAULT;
> +                       alloc_flag = DRM_MM_CREATE_DEFAULT;
> +               }
>
>  search_free:
> -       ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -                                                 size, alignment,
> -                                                 obj->cache_level,
> -                                                 start, end,
> -                                                 search_flag,
> -                                                 alloc_flag);
> -       if (ret) {
> -               ret = i915_gem_evict_something(dev, vm, size, alignment,
> -                                              obj->cache_level,
> -                                              start, end,
> -                                              flags);
> -               if (ret == 0)
> -                       goto search_free;
> +               ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> +                                                         size, alignment,
> +                                                         obj->cache_level,
> +                                                         start, end,
> +                                                         search_flag,
> +                                                         alloc_flag);
> +               if (ret) {
> +                       ret = i915_gem_evict_something(dev, vm, size, alignment,
> +                                                      obj->cache_level,
> +                                                      start, end,
> +                                                      flags);
> +                       if (ret == 0)
> +                               goto search_free;
>
> -               goto err_free_vma;
> +                       goto err_free_vma;
> +               }
>         }
>         if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>                 ret = -EINVAL;
> @@ -3970,6 +3992,10 @@ i915_vma_misplaced(struct i915_vma *vma,
>             vma->node.start < (flags & PIN_OFFSET_MASK))
>                 return true;
>
> +       if (flags & PIN_OFFSET_FIXED &&
> +           vma->node.start != (flags & PIN_OFFSET_MASK))
> +               return true;
> +
>         return false;
>  }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d71a133ceff5..60450a95a491 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,67 @@ found:
>         return ret;
>  }
>
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
> +{
> +       struct list_head eviction_list;
> +       struct interval_tree_node *it;
> +       u64 end = target->node.start + target->node.size;
> +       struct drm_mm_node *node;
> +       struct i915_vma *vma, *next;
> +       int ret;
> +
> +       trace_i915_gem_evict_vma(target, flags);
> +
> +       it = interval_tree_iter_first(&target->vm->mm.interval_tree,
> +                                     target->node.start, end -1);
> +       if (it == NULL)
> +               return 0;
> +
> +       INIT_LIST_HEAD(&eviction_list);
> +       node = container_of(it, typeof(*node), it);
> +       list_for_each_entry_from(node,
> +                                &target->vm->mm.head_node.node_list,
> +                                node_list) {
> +               if (node->start >= end)
> +                       break;
> +
> +               vma = container_of(node, typeof(*vma), node);
> +               if (flags & PIN_NONBLOCK &&
> +                   (vma->pin_count || vma->obj->active)) {
> +                       ret = -ENOSPC;
> +                       break;
> +               }
> +
> +               if (vma->exec_entry &&
> +                   vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
> +                       /* Overlapping pinned objects in the same batch */
> +                       ret = -EINVAL;
> +                       break;
> +               }
> +
> +               if (vma->pin_count) {
> +                       /* We may need to evict an buffer in the same batch */
> +                       ret = vma->exec_entry ? -ENOSPC : -EBUSY;
> +                       break;
> +               }
> +
> +               list_add(&vma->exec_list, &eviction_list);
> +               drm_gem_object_reference(&vma->obj->base);
> +       }
> +
> +       ret = 0;
> +       list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
> +               struct drm_i915_gem_object *obj = vma->obj;
> +               list_del_init(&vma->exec_list);
> +               if (ret == 0)
> +                       ret = i915_vma_unbind(vma);
> +               drm_gem_object_unreference(&obj->base);
> +       }
> +
> +       return ret;
> +}
> +
>  /**
>   * i915_gem_evict_vm - Evict all idle vmas from a vm
>   * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 19dd6b05ee1d..c35c9dc526e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -603,6 +603,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>                         flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
>                 if ((flags & PIN_MAPPABLE) == 0)
>                         flags |= PIN_HIGH;
> +               if (entry->flags & EXEC_OBJECT_PINNED)
> +                       flags |= entry->offset | PIN_OFFSET_FIXED;
>         }
>
>         ret = i915_gem_object_pin(obj, vma->vm,
> @@ -679,6 +681,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>         if (vma->node.size < entry->pad_to_size)
>                 return true;
>
> +       if (entry->flags & EXEC_OBJECT_PINNED &&
> +           vma->node.start != entry->offset)
> +               return true;
> +
>         if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>             vma->node.start < BATCH_OFFSET_BIAS)
>                 return true;


I think would be better to add the following change here.

- if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
+ if (!(entry->flags &
+    (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
     (vma->node.start + vma->node.size - 1) >> 32)
  return true;

This way, User will not have to necessarily pass the 48B_ADDRESS flag
also along with the OBJECT_PINNED flag, if the offset is > 4 GB.
The OBJECT_PINNED flag will take precedence over 48B_ADDRESS flag.

Best regards
Akash


> @@ -1328,7 +1334,8 @@ eb_get_batch(struct eb_vmas *eb)
>          * Note that actual hangs have only been observed on gen7, but for
>          * paranoia do it everywhere.
>          */
> -       vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> +       if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> +               vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>
>         return vma->obj;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index b1dcee718640..ef2387c01fa9 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -459,6 +459,29 @@ TRACE_EVENT(i915_gem_evict_vm,
>             TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
>  );
>
> +TRACE_EVENT(i915_gem_evict_vma,
> +           TP_PROTO(struct i915_vma *vma, unsigned flags),
> +           TP_ARGS(vma, flags),
> +
> +           TP_STRUCT__entry(
> +                            __field(u32, dev)
> +                            __field(struct i915_address_space *, vm)
> +                            __field(u64, start)
> +                            __field(u64, size)
> +                            __field(unsigned, flags)
> +                           ),
> +
> +           TP_fast_assign(
> +                          __entry->dev = vma->vm->dev->primary->index;
> +                          __entry->vm = vma->vm;
> +                          __entry->start = vma->node.start;
> +                          __entry->size = vma->node.size;
> +                          __entry->flags = flags;
> +                         ),
> +
> +           TP_printk("dev=%d, vm=%p, start=%llx size=%llx, flags=%x", __entry->dev, __entry->vm, (long long)__entry->start, (long long)__entry->size, __entry->flags)
> +);
> +
>  TRACE_EVENT(i915_gem_ring_sync_to,
>             TP_PROTO(struct drm_i915_gem_request *to_req,
>                      struct intel_engine_cs *from,
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 5e2fc61e7d88..766aa4fb8264 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_EU_TOTAL             34
>  #define I915_PARAM_HAS_GPU_RESET        35
>  #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> +#define I915_PARAM_HAS_EXEC_SOFTPIN     37
>
>  typedef struct drm_i915_getparam {
>         s32 param;
> @@ -692,7 +693,8 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_WRITE      (1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>  #define EXEC_OBJECT_PAD_TO_SIZE        (1<<4)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> +#define EXEC_OBJECT_PINNED     (1<<5)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>         __u64 flags;
>
>         __u64 pad_to_size;
> --
> 2.6.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux