On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@xxxxxxxxx> wrote: > From: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > Userspace can pass in an offset that it presumes the object is located > at. The kernel will then do its utmost to fit the object into that > location. The assumption is that userspace is handling its own object > locations (for example along with full-ppgtt) and that the kernel will > rarely have to make space for the user's requests. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris > Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. > (Not published externally) > > v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction > to allow eviction of soft-pinned objects when another soft-pinned object used > by a subsequent execbuffer overlaps reported by Michal Winiarski. > (Not published externally) > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are > pinned first after an address conflict happens to avoid repeated conflicts in > rare cases (Suggested by Chris Wilson). Expanded comment on > drm_i915_gem_exec_object2.offset to cover this new API. Can we add an I915_PARAM_HAS_EXEC_PINNED param for this feature? Kristian > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Akash Goel <akash.goel@xxxxxxxxx> > Cc: Vinay Belgaumkar <vinay.belgaumkar@xxxxxxxxx> > Cc: Michal Winiarsky <michal.winiarsky@xxxxxxxxx> > Cc: Zou Nanhai <nanhai.zou@xxxxxxxxx> > Signed-off-by: Thomas Daniel <thomas.daniel@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 +++ > drivers/gpu/drm/i915/i915_gem.c | 51 ++++++++++++++++++++-------- > drivers/gpu/drm/i915/i915_gem_evict.c | 38 +++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-- > include/uapi/drm/i915_drm.h | 9 +++-- > 5 files changed, 99 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d90a782..e96c101 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_USER (1<<4) > #define PIN_UPDATE (1<<5) > #define PIN_FULL_RANGE (1<<6) > +#define PIN_OFFSET_FIXED (1<<7) > #define PIN_OFFSET_MASK (~4095) > + > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, > unsigned long start, > unsigned long end, > unsigned flags); > +int __must_check > +i915_gem_evict_for_vma(struct i915_vma *target); > int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); > int i915_gem_evict_everything(struct drm_device *dev); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index f170da6..a7e5ff2 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > goto err_unpin; > > + if (flags & PIN_OFFSET_FIXED) { > + uint64_t offset = flags & PIN_OFFSET_MASK; > + if (offset & (alignment - 1)) { > + ret = -EINVAL; > + goto err_free_vma; > + } > + vma->node.start = offset; > + vma->node.size = size; > + vma->node.color = obj->cache_level; > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + if (ret) { > + ret = i915_gem_evict_for_vma(vma); > + if (ret == 0) > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + } > + if (ret) > + goto err_free_vma; > + } else { > search_free: > - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > - size, alignment, > - obj->cache_level, > - start, end, > - search_flag, > - alloc_flag); > - if (ret) { > - ret = i915_gem_evict_something(dev, vm, size, alignment, > - obj->cache_level, > - start, end, > - flags); > - if (ret == 0) > - goto search_free; > + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > + size, alignment, > + obj->cache_level, > + start, end, > + search_flag, > + alloc_flag); > + if (ret) { > + ret = i915_gem_evict_something(dev, vm, size, alignment, > + obj->cache_level, > + start, end, > + flags); > + if (ret == 0) > + goto search_free; > > - goto err_free_vma; > + goto err_free_vma; > + } > } > if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { > ret = -EINVAL; > @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) > vma->node.start < (flags & PIN_OFFSET_MASK)) > return true; > > + if (flags & PIN_OFFSET_FIXED && > + vma->node.start != (flags & PIN_OFFSET_MASK)) > + return true; > + > return false; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > index d09e35e..6098e2f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -199,6 +199,44 @@ found: > return ret; > } > > +int > +i915_gem_evict_for_vma(struct i915_vma *target) > +{ > + struct drm_mm_node *node, *next; > + > + list_for_each_entry_safe(node, next, > + &target->vm->mm.head_node.node_list, > + node_list) { > + struct i915_vma *vma; > + int ret; > + > + if (node->start + node->size <= target->node.start) > + continue; > + if (node->start >= target->node.start + target->node.size) > + break; > + > + vma = container_of(node, typeof(*vma), node); > + > + if (vma->pin_count) { > + /* We may need to evict a buffer in the same batch */ > + if (!vma->exec_entry) > + return -EBUSY; > + > + if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) > + /* Overlapping fixed objects in the same batch */ > + return -EINVAL; > + > + return -ENOSPC; > + } > + > + ret = i915_vma_unbind(vma); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > /** > * i915_gem_evict_vm - Evict all idle vmas from a vm > * @vm: Address space to cleanse > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 3d94744..4e6955e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > flags |= PIN_GLOBAL | PIN_MAPPABLE; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > + if (entry->flags & EXEC_OBJECT_PINNED) > + flags |= entry->offset | PIN_OFFSET_FIXED; > } > > ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); > @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma) > vma->node.start & (entry->alignment - 1)) > return true; > > + if (entry->flags & EXEC_OBJECT_PINNED && > + vma->node.start != entry->offset) > + return true; > + > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && > vma->node.start < BATCH_OFFSET_BIAS) > return true; > @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > struct i915_vma *vma; > struct i915_address_space *vm; > struct list_head ordered_vmas; > + struct list_head pinned_vmas; > bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; > int retry; > > @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; > > INIT_LIST_HEAD(&ordered_vmas); > + INIT_LIST_HEAD(&pinned_vmas); > while (!list_empty(vmas)) { > struct drm_i915_gem_exec_object2 *entry; > bool need_fence, need_mappable; > @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > obj->tiling_mode != I915_TILING_NONE; > need_mappable = need_fence || need_reloc_mappable(vma); > > - if (need_mappable) { > + if (entry->flags & EXEC_OBJECT_PINNED) > + list_move_tail(&vma->exec_list, &pinned_vmas); > + else if (need_mappable) { > entry->flags |= __EXEC_OBJECT_NEEDS_MAP; > list_move(&vma->exec_list, &ordered_vmas); > } else > @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > obj->base.pending_write_domain = 0; > } > list_splice(&ordered_vmas, vmas); > + list_splice(&pinned_vmas, vmas); > > /* Attempt to pin all of the buffers into the GTT. > * This is done in 3 phases: > @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb) > * Note that actual hangs have only been observed on gen7, but for > * paranoia do it everywhere. > */ > - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) > + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > > return vma->obj; > } > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 55ba527..18f8f99 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 { > __u64 alignment; > > /** > - * Returned value of the updated offset of the object, for future > - * presumed_offset writes. > + * When the EXEC_OBJECT_PINNED flag is specified this is populated by > + * the user with the GTT offset at which this object will be pinned. > + * Otherwise the kernel populates it with the value of the updated > + * offset of the object, for future presumed_offset writes. > */ > __u64 offset; > > @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1) > +#define EXEC_OBJECT_PINNED (1<<4) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) > __u64 flags; > > __u64 rsvd1; > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx