During execbuffer we look up the i915_vma in order to reserver them in the VM. However, we then do a double lookup of the vma in order to then pin them, all because we lack the necessary interfaces to operate on i915_vma. v2: Tidy parameter lists to remove one level of redirection in the hot path. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 28 +++-- drivers/gpu/drm/i915/i915_gem.c | 159 ++++++++++++----------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 127 +++++++++++------------ drivers/gpu/drm/i915/i915_gem_gtt.c | 3 - 4 files changed, 144 insertions(+), 173 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7df6cfabe7fa..f6e508e5aa5b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2680,6 +2680,11 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data( void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); void i915_gem_free_object(struct drm_gem_object *obj); +int __must_check +i915_vma_pin(struct i915_vma *vma, + uint64_t size, + uint64_t alignment, + uint64_t flags); /* Flags used by pin/bind&friends. */ #define PIN_MAPPABLE (1<<0) #define PIN_NONBLOCK (1<<1) @@ -2691,12 +2696,19 @@ void i915_gem_free_object(struct drm_gem_object *obj); #define PIN_HIGH (1<<7) #define PIN_OFFSET_FIXED (1<<8) #define PIN_OFFSET_MASK (~4095) -int __must_check -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - uint64_t size, - uint32_t alignment, - uint64_t flags); + +static inline void __i915_vma_unpin(struct i915_vma *vma) +{ + vma->pin_count--; +} + +static inline void i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->pin_count == 0); + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + __i915_vma_unpin(vma); +} + int __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, @@ -2933,8 +2945,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, uint32_t alignment, unsigned flags) { - return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment, - flags | PIN_GLOBAL); + return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal, + 0, alignment, flags); } static inline int diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0d4f358f4067..c6d7a78ab605 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2746,26 +2746,21 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, * Finds free space in the GTT aperture and binds the object or a view of it * there. */ -static struct i915_vma * -i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - uint64_t size, - uint64_t alignment, - uint64_t flags) +static int +i915_vma_insert(struct i915_vma *vma, + uint64_t size, + uint64_t alignment, + uint64_t flags) { + struct drm_i915_gem_object *obj = vma->obj; struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_vma *vma; u64 start, end; u64 min_alignment; int ret; - vma = ggtt_view ? - i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : - i915_gem_obj_lookup_or_create_vma(obj, vm); - if (IS_ERR(vma)) - return vma; + GEM_BUG_ON(vma->bound); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); if (flags & PIN_MAPPABLE) @@ -2779,7 +2774,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, if (alignment & (min_alignment - 1)) { DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n", alignment, min_alignment); - return ERR_PTR(-EINVAL); + return -EINVAL; } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; @@ -2799,12 +2794,12 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return ERR_PTR(-E2BIG); + return -E2BIG; } ret = i915_gem_object_get_pages(obj); if (ret) - return ERR_PTR(ret); + return ret; i915_gem_object_pin_pages(obj); @@ -2866,13 +2861,13 @@ search_free: list_move_tail(&vma->vm_link, &vma->vm->inactive_list); obj->bind_count++; - return vma; + return 0; err_remove_node: drm_mm_remove_node(&vma->node); err_unpin: i915_gem_object_unpin_pages(obj); - return ERR_PTR(ret); + return ret; } bool @@ -3435,6 +3430,9 @@ i915_vma_misplaced(struct i915_vma *vma, { struct drm_i915_gem_object *obj = vma->obj; + if (!drm_mm_node_allocated(&vma->node)) + return false; + if (vma->node.size < size) return true; @@ -3478,94 +3476,45 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) obj->map_and_fenceable = mappable && fenceable; } -static int -i915_gem_object_do_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - uint64_t size, - uint32_t alignment, - uint64_t flags) +int +i915_vma_pin(struct i915_vma *vma, + uint64_t size, + uint64_t alignment, + uint64_t flags) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct i915_vma *vma; - unsigned bound; + unsigned bound = vma->bound; int ret; - if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) - return -ENODEV; - - if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) - return -EINVAL; - - if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) - return -EINVAL; + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt); - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) - return -EINVAL; - - vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : - i915_gem_obj_to_vma(obj, vm); - - if (IS_ERR(vma)) - return PTR_ERR(vma); - - if (vma) { - if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; - - if (i915_vma_misplaced(vma, size, alignment, flags)) { - WARN(vma->pin_count, - "bo is already pinned in %s with incorrect alignment:" - " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," - " obj->map_and_fenceable=%d\n", - ggtt_view ? "ggtt" : "ppgtt", - upper_32_bits(vma->node.start), - lower_32_bits(vma->node.start), - alignment, - !!(flags & PIN_MAPPABLE), - obj->map_and_fenceable); - ret = i915_vma_unbind(vma); - if (ret) - return ret; + if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) + return -EBUSY; - vma = NULL; - } - } + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + vma->pin_count++; - if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view, - size, alignment, flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (!bound) { + ret = i915_vma_insert(vma, size, alignment, flags); + if (ret) + goto err; } - bound = vma->bound; - ret = i915_vma_bind(vma, obj->cache_level, flags); + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - return ret; + goto err; - if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && - (bound ^ vma->bound) & GLOBAL_BIND) { + if ((bound ^ vma->bound) & GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); - WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); - } GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - - vma->pin_count++; return 0; -} -int -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - uint64_t size, - uint32_t alignment, - uint64_t flags) -{ - return i915_gem_object_do_pin(obj, vm, - i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - size, alignment, flags); +err: + vma->pin_count--; + return ret; } int @@ -3575,11 +3524,35 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, uint32_t alignment, uint64_t flags) { + struct i915_vma *vma; + int ret; + if (WARN_ONCE(!view, "no view specified")) return -EINVAL; - return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view, - size, alignment, flags | PIN_GLOBAL); + vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NONBLOCK && (vma->pin_count | vma->active)) + return -ENOSPC; + + WARN(vma->pin_count, + "bo is already pinned in ggtt with incorrect alignment:" + " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d," + " obj->map_and_fenceable=%d\n", + upper_32_bits(vma->node.start), + lower_32_bits(vma->node.start), + alignment, + !!(flags & PIN_MAPPABLE), + obj->map_and_fenceable); + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); } void diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 899220139a8a..d4dcc3e5d080 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -44,11 +44,10 @@ struct i915_execbuffer_params { struct drm_device *dev; struct drm_file *file; + struct i915_vma *batch_vma; uint32_t dispatch_flags; uint32_t args_batch_start_offset; - uint64_t batch_obj_vm_offset; struct intel_engine_cs *ring; - struct drm_i915_gem_object *batch_obj; struct intel_context *ctx; struct drm_i915_gem_request *request; }; @@ -101,6 +100,26 @@ eb_reset(struct eb_vmas *eb) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } +static struct i915_vma * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma; +} + static int eb_lookup_vmas(struct eb_vmas *eb, struct drm_i915_gem_exec_object2 *exec, @@ -642,16 +661,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, - entry->pad_to_size, - entry->alignment, - flags); - if ((ret == -ENOSPC || ret == -E2BIG) && + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags); + if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) - ret = i915_gem_object_pin(obj, vma->vm, - entry->pad_to_size, - entry->alignment, - flags & ~PIN_MAPPABLE); + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags & ~PIN_MAPPABLE); if (ret) return ret; @@ -1203,11 +1222,11 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } -static struct drm_i915_gem_object* +static struct i915_vma* i915_gem_execbuffer_parse(struct intel_engine_cs *ring, struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct eb_vmas *eb, struct drm_i915_gem_object *batch_obj, + struct eb_vmas *eb, u32 batch_start_offset, u32 batch_len, bool is_master) @@ -1219,7 +1238,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring, shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool, PAGE_ALIGN(batch_len)); if (IS_ERR(shadow_batch_obj)) - return shadow_batch_obj; + return ERR_CAST(shadow_batch_obj); ret = i915_parse_cmds(ring, batch_obj, @@ -1244,14 +1263,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring, drm_gem_object_reference(&shadow_batch_obj->base); list_add_tail(&vma->exec_list, &eb->vmas); - shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; - - return shadow_batch_obj; + return vma; err: i915_gem_object_unpin_pages(shadow_batch_obj); if (ret == -EACCES) /* unhandled chained batch */ - return batch_obj; + return NULL; else return ERR_PTR(ret); } @@ -1331,7 +1348,7 @@ execbuf_submit(struct i915_execbuffer_params *params, } exec_len = args->batch_len; - exec_start = params->batch_obj_vm_offset + + exec_start = params->batch_vma->node.start + params->args_batch_start_offset; ret = params->ring->emit_bb_start(params->request, @@ -1378,26 +1395,6 @@ static int gen8_dispatch_bsd_ring(struct drm_device *dev, } } -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; -} - static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1406,7 +1403,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, { struct drm_i915_private *dev_priv = dev->dev_private; struct eb_vmas *eb; - struct drm_i915_gem_object *batch_obj; struct drm_i915_gem_exec_object2 shadow_exec_entry; struct intel_engine_cs *ring; struct intel_context *ctx; @@ -1542,7 +1538,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = eb_get_batch(eb); + params->batch_vma = eb_get_batch(eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; @@ -1564,7 +1560,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (params->batch_vma->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; @@ -1572,26 +1568,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->args_batch_start_offset = args->batch_start_offset; if (i915_needs_cmd_parser(ring) && args->batch_len) { - struct drm_i915_gem_object *parsed_batch_obj; - - parsed_batch_obj = i915_gem_execbuffer_parse(ring, - &shadow_exec_entry, - eb, - batch_obj, - args->batch_start_offset, - args->batch_len, - file->is_master); - if (IS_ERR(parsed_batch_obj)) { - ret = PTR_ERR(parsed_batch_obj); + struct i915_vma *vma; + + vma = i915_gem_execbuffer_parse(ring, &shadow_exec_entry, + params->batch_vma->obj, + eb, + args->batch_start_offset, + args->batch_len, + file->is_master); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; } - /* - * parsed_batch_obj == batch_obj means batch not fully parsed: - * Accept, but don't promote to secure. - */ - - if (parsed_batch_obj != batch_obj) { + if (vma) { /* * Batch parsed and accepted: * @@ -1603,16 +1593,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ dispatch_flags |= I915_DISPATCH_SECURE; params->args_batch_start_offset = 0; - batch_obj = parsed_batch_obj; + params->batch_vma = vma; } } - batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + params->batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ if (dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = params->batch_vma->obj; + /* * So on first glance it looks freaky that we pin the batch here * outside of the reservation loop. But: @@ -1623,13 +1615,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (ret) goto err; - params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); - } else - params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); + params->batch_vma = i915_gem_obj_to_ggtt(obj); + } /* Allocate a request for this batch buffer nice and early. */ params->request = i915_gem_request_alloc(ring, ctx); @@ -1654,11 +1645,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->file = file; params->ring = ring; params->dispatch_flags = dispatch_flags; - params->batch_obj = batch_obj; params->ctx = ctx; ret = execbuf_submit(params, args, &eb->vmas); - __i915_add_request(params->request, params->batch_obj, ret == 0); + __i915_add_request(params->request, params->batch_vma->obj, ret == 0); err_batch_unpin: /* @@ -1668,8 +1658,7 @@ err_batch_unpin: * active. */ if (dispatch_flags & I915_DISPATCH_SECURE) - i915_gem_object_ggtt_unpin(batch_obj); - + i915_vma_unpin(params->batch_vma); err: /* the request owns the ref now */ i915_gem_context_unreference(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 98b9730f4066..8f3b2f051918 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3549,13 +3549,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return 0; if (vma->bound == 0 && vma->vm->allocate_va_range) { - /* XXX: i915_vma_pin() will fix this +- hack */ - vma->pin_count++; trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->node.size); - vma->pin_count--; if (ret) return ret; } -- 2.7.0.rc3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx