On Tue, Oct 6, 2015 at 4:23 PM, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > Userspace can pass in an offset that it presumes the object is located > at. The kernel will then do its utmost to fit the object into that > location. The assumption is that userspace is handling its own object > locations (for example along with full-ppgtt) and that the kernel will > rarely have to make space for the user's requests. > > v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary > and fixed objects within the same batch > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: "Daniel, Thomas" <thomas.daniel@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_dma.c | 3 ++ > drivers/gpu/drm/i915/i915_drv.h | 10 +++-- > drivers/gpu/drm/i915/i915_gem.c | 68 +++++++++++++++++++++--------- > drivers/gpu/drm/i915/i915_gem_evict.c | 61 +++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++- > drivers/gpu/drm/i915/i915_trace.h | 23 ++++++++++ > include/uapi/drm/i915_drm.h | 4 +- > 7 files changed, 151 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index ab37d1121be8..cd79ef114b8e 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > case I915_PARAM_HAS_RESOURCE_STREAMER: > value = HAS_RESOURCE_STREAMER(dev); > break; > + case I915_PARAM_HAS_EXEC_SOFTPIN: > + value = 1; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index a0ce011a5dc0..7d351d991022 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2778,10 +2778,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_NONBLOCK (1<<1) > #define PIN_GLOBAL (1<<2) > #define PIN_OFFSET_BIAS (1<<3) > -#define PIN_USER (1<<4) > -#define PIN_UPDATE (1<<5) > -#define PIN_ZONE_4G (1<<6) > -#define PIN_HIGH (1<<7) > +#define PIN_OFFSET_FIXED (1<<4) > +#define PIN_USER (1<<5) > +#define PIN_UPDATE (1<<6) > +#define PIN_ZONE_4G (1<<7) > +#define PIN_HIGH (1<<8) > #define PIN_OFFSET_MASK (~4095) > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > @@ -3127,6 +3128,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, > unsigned long start, > unsigned long end, > unsigned flags); > +int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags); > int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); > > /* belongs in i915_gem_gtt.h */ > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 8fe3df0cdcb8..82efd6a6dee0 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3334,7 +3334,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct drm_device *dev = obj->base.dev; > struct drm_i915_private *dev_priv = dev->dev_private; > u64 start, end; > - u32 search_flag, alloc_flag; > struct i915_vma *vma; > int ret; > > @@ -3409,30 +3408,53 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > goto err_unpin; > > - if (flags & PIN_HIGH) { > - search_flag = DRM_MM_SEARCH_BELOW; > - alloc_flag = DRM_MM_CREATE_TOP; > + if (flags & PIN_OFFSET_FIXED) { > + uint64_t offset = flags & PIN_OFFSET_MASK; > + if (offset & (alignment - 1) || offset + size > end) { > + vma = ERR_PTR(-EINVAL); > + goto err_free_vma; > + } > + vma->node.start = offset; > + vma->node.size = size; > + vma->node.color = obj->cache_level; > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + if (ret) { > + ret = i915_gem_evict_for_vma(vma, flags); > + if (ret == 0) > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + } > + if (ret) { > + vma = ERR_PTR(ret); > + goto err_free_vma; > + } > } else { > - search_flag = DRM_MM_SEARCH_DEFAULT; > - alloc_flag = DRM_MM_CREATE_DEFAULT; > - } > + u32 search_flag, alloc_flag; > + > + if (flags & PIN_HIGH) { > + search_flag = DRM_MM_SEARCH_BELOW; > + alloc_flag = DRM_MM_CREATE_TOP; > + } else { > + search_flag = DRM_MM_SEARCH_DEFAULT; > + alloc_flag = DRM_MM_CREATE_DEFAULT; > + } > > search_free: > - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > - size, alignment, > - obj->cache_level, > - start, end, > - search_flag, > - alloc_flag); > - if (ret) { > - ret = i915_gem_evict_something(dev, vm, size, alignment, > - obj->cache_level, > - start, end, > - flags); > - if (ret == 0) > - goto search_free; > + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > + size, alignment, > + obj->cache_level, > + start, end, > + search_flag, > + alloc_flag); > + if (ret) { > + ret = i915_gem_evict_something(dev, vm, size, alignment, > + obj->cache_level, > + start, end, > + flags); > + if (ret == 0) > + goto search_free; > > - goto err_free_vma; > + goto err_free_vma; > + } > } > if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { > ret = -EINVAL; > @@ -3970,6 +3992,10 @@ i915_vma_misplaced(struct i915_vma *vma, > vma->node.start < (flags & PIN_OFFSET_MASK)) > return true; > > + if (flags & PIN_OFFSET_FIXED && > + vma->node.start != (flags & PIN_OFFSET_MASK)) > + return true; > + > return false; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > index d71a133ceff5..60450a95a491 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -199,6 +199,67 @@ found: > return ret; > } > > +int > +i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags) > +{ > + struct list_head eviction_list; > + struct interval_tree_node *it; > + u64 end = target->node.start + target->node.size; > + struct drm_mm_node *node; > + struct i915_vma *vma, *next; > + int ret; > + > + trace_i915_gem_evict_vma(target, flags); > + > + it = interval_tree_iter_first(&target->vm->mm.interval_tree, > + target->node.start, end -1); > + if (it == NULL) > + return 0; > + > + INIT_LIST_HEAD(&eviction_list); > + node = container_of(it, typeof(*node), it); > + list_for_each_entry_from(node, > + &target->vm->mm.head_node.node_list, > + node_list) { > + if (node->start >= end) > + break; > + > + vma = container_of(node, typeof(*vma), node); > + if (flags & PIN_NONBLOCK && > + (vma->pin_count || vma->obj->active)) { > + ret = -ENOSPC; > + break; > + } > + > + if (vma->exec_entry && > + vma->exec_entry->flags & EXEC_OBJECT_PINNED) { > + /* Overlapping pinned objects in the same batch */ > + ret = -EINVAL; > + break; > + } > + > + if (vma->pin_count) { > + /* We may need to evict an buffer in the same batch */ > + ret = vma->exec_entry ? -ENOSPC : -EBUSY; > + break; > + } > + > + list_add(&vma->exec_list, &eviction_list); > + drm_gem_object_reference(&vma->obj->base); > + } > + > + ret = 0; > + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { > + struct drm_i915_gem_object *obj = vma->obj; > + list_del_init(&vma->exec_list); > + if (ret == 0) > + ret = i915_vma_unbind(vma); > + drm_gem_object_unreference(&obj->base); > + } > + > + return ret; > +} > + > /** > * i915_gem_evict_vm - Evict all idle vmas from a vm > * @vm: Address space to cleanse > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 19dd6b05ee1d..c35c9dc526e7 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -603,6 +603,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > if ((flags & PIN_MAPPABLE) == 0) > flags |= PIN_HIGH; > + if (entry->flags & EXEC_OBJECT_PINNED) > + flags |= entry->offset | PIN_OFFSET_FIXED; > } > > ret = i915_gem_object_pin(obj, vma->vm, > @@ -679,6 +681,10 @@ eb_vma_misplaced(struct i915_vma *vma) > if (vma->node.size < entry->pad_to_size) > return true; > > + if (entry->flags & EXEC_OBJECT_PINNED && > + vma->node.start != entry->offset) > + return true; > + > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && > vma->node.start < BATCH_OFFSET_BIAS) > return true; I think would be better to add the following change here. - if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && + if (!(entry->flags & + (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) && (vma->node.start + vma->node.size - 1) >> 32) return true; This way, User will not have to necessarily pass the 48B_ADDRESS flag also along with the OBJECT_PINNED flag, if the offset is > 4 GB. The OBJECT_PINNED flag will take precedence over 48B_ADDRESS flag. Best regards Akash > @@ -1328,7 +1334,8 @@ eb_get_batch(struct eb_vmas *eb) > * Note that actual hangs have only been observed on gen7, but for > * paranoia do it everywhere. > */ > - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) > + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > > return vma->obj; > } > diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h > index b1dcee718640..ef2387c01fa9 100644 > --- a/drivers/gpu/drm/i915/i915_trace.h > +++ b/drivers/gpu/drm/i915/i915_trace.h > @@ -459,6 +459,29 @@ TRACE_EVENT(i915_gem_evict_vm, > TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm) > ); > > +TRACE_EVENT(i915_gem_evict_vma, > + TP_PROTO(struct i915_vma *vma, unsigned flags), > + TP_ARGS(vma, flags), > + > + TP_STRUCT__entry( > + __field(u32, dev) > + __field(struct i915_address_space *, vm) > + __field(u64, start) > + __field(u64, size) > + __field(unsigned, flags) > + ), > + > + TP_fast_assign( > + __entry->dev = vma->vm->dev->primary->index; > + __entry->vm = vma->vm; > + __entry->start = vma->node.start; > + __entry->size = vma->node.size; > + __entry->flags = flags; > + ), > + > + TP_printk("dev=%d, vm=%p, start=%llx size=%llx, flags=%x", __entry->dev, __entry->vm, (long long)__entry->start, (long long)__entry->size, __entry->flags) > +); > + > TRACE_EVENT(i915_gem_ring_sync_to, > TP_PROTO(struct drm_i915_gem_request *to_req, > struct intel_engine_cs *from, > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 5e2fc61e7d88..766aa4fb8264 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_EU_TOTAL 34 > #define I915_PARAM_HAS_GPU_RESET 35 > #define I915_PARAM_HAS_RESOURCE_STREAMER 36 > +#define I915_PARAM_HAS_EXEC_SOFTPIN 37 > > typedef struct drm_i915_getparam { > s32 param; > @@ -692,7 +693,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_WRITE (1<<2) > #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) > #define EXEC_OBJECT_PAD_TO_SIZE (1<<4) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) > +#define EXEC_OBJECT_PINNED (1<<5) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) > __u64 flags; > > __u64 pad_to_size; > -- > 2.6.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx