During execbuffer we look up the i915_vma in order to reserve them in the VM. However, we then do a double lookup of the vma in order to then pin them, all because we lack the necessary interfaces to operate on i915_vma - so introduce i915_vma_pin()! v2: Tidy parameter lists to remove one level of redirection in the hot path. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 24 +---- drivers/gpu/drm/i915/i915_gem.c | 155 ++++++++++++----------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 142 ++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 - drivers/gpu/drm/i915/i915_gem_gtt.h | 14 +++ 5 files changed, 138 insertions(+), 200 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 717834bc1ae6..490e337b65d4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3018,23 +3018,6 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data( void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); void i915_gem_free_object(struct drm_gem_object *obj); -/* Flags used by pin/bind&friends. */ -#define PIN_MAPPABLE (1<<0) -#define PIN_NONBLOCK (1<<1) -#define PIN_GLOBAL (1<<2) -#define PIN_OFFSET_BIAS (1<<3) -#define PIN_USER (1<<4) -#define PIN_UPDATE (1<<5) -#define PIN_ZONE_4G (1<<6) -#define PIN_HIGH (1<<7) -#define PIN_OFFSET_FIXED (1<<8) -#define PIN_OFFSET_MASK (~4095) -int __must_check -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - u64 size, - u64 alignment, - u64 flags); int __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, @@ -3312,11 +3295,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, uint32_t alignment, unsigned flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - return i915_gem_object_pin(obj, &ggtt->base, 0, alignment, - flags | PIN_GLOBAL); + return i915_gem_object_ggtt_pin(obj, &i915_ggtt_view_normal, + 0, alignment, flags); } void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fa9cb3313147..3b7b44e74301 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2969,26 +2969,18 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, * @alignment: requested alignment * @flags: mask of PIN_* flags to use */ -static struct i915_vma * -i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - u64 size, - u64 alignment, - u64 flags) +static int +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { + struct drm_i915_gem_object *obj = vma->obj; struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_vma *vma; u64 start, end; u64 min_alignment; int ret; - vma = ggtt_view ? - i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : - i915_gem_obj_lookup_or_create_vma(obj, vm); - if (IS_ERR(vma)) - return vma; + GEM_BUG_ON(vma->bound); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); if (flags & PIN_MAPPABLE) @@ -3002,7 +2994,7 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, if (alignment & (min_alignment - 1)) { DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n", alignment, min_alignment); - return ERR_PTR(-EINVAL); + return -EINVAL; } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; @@ -3022,17 +3014,17 @@ i915_gem_object_insert_into_vm(struct drm_i915_gem_object *obj, size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return ERR_PTR(-E2BIG); + return -E2BIG; } ret = i915_gem_object_get_pages(obj); if (ret) - return ERR_PTR(ret); + return ret; i915_gem_object_pin_pages(obj); if (flags & PIN_OFFSET_FIXED) { - uint64_t offset = flags & PIN_OFFSET_MASK; + u64 offset = flags & PIN_OFFSET_MASK; if (offset & (alignment - 1) || offset > end - size) { ret = -EINVAL; goto err_unpin; @@ -3088,11 +3080,11 @@ search_free: list_move_tail(&vma->vm_link, &vma->vm->inactive_list); obj->bind_count++; - return vma; + return 0; err_unpin: i915_gem_object_unpin_pages(obj); - return ERR_PTR(ret); + return ret; } bool @@ -3657,6 +3649,9 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { struct drm_i915_gem_object *obj = vma->obj; + if (!drm_mm_node_allocated(&vma->node)) + return false; + if (vma->node.size < size) return true; @@ -3700,91 +3695,42 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) obj->map_and_fenceable = mappable && fenceable; } -static int -i915_gem_object_do_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, - u64 size, - u64 alignment, - u64 flags) +int +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *vma; - unsigned bound; + unsigned int bound = vma->bound; int ret; - if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) - return -ENODEV; - - if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm))) - return -EINVAL; - - if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE)) - return -EINVAL; - - if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) - return -EINVAL; - - vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) : - i915_gem_obj_to_vma(obj, vm); + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !vma->is_ggtt); - if (vma) { - if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; - - if (i915_vma_misplaced(vma, size, alignment, flags)) { - WARN(i915_vma_is_pinned(vma), - "bo is already pinned in %s with incorrect alignment:" - " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," - " obj->map_and_fenceable=%d\n", - ggtt_view ? "ggtt" : "ppgtt", - upper_32_bits(vma->node.start), - lower_32_bits(vma->node.start), - (long long)alignment, - !!(flags & PIN_MAPPABLE), - obj->map_and_fenceable); - ret = i915_vma_unbind(vma); - if (ret) - return ret; + if (WARN_ON(i915_vma_pin_count(vma) == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) + return -EBUSY; - vma = NULL; - } - } + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + __i915_vma_pin(vma); - if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - vma = i915_gem_object_insert_into_vm(obj, vm, ggtt_view, - size, alignment, flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (!bound) { + ret = i915_vma_insert(vma, size, alignment, flags); + if (ret) + goto err; } - bound = vma->bound; - ret = i915_vma_bind(vma, obj->cache_level, flags); + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - return ret; + goto err; - if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && - (bound ^ vma->bound) & GLOBAL_BIND) { + if ((bound ^ vma->bound) & GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); - WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); - } GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - - __i915_vma_pin(vma); return 0; -} -int -i915_gem_object_pin(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - u64 size, - u64 alignment, - u64 flags) -{ - return i915_gem_object_do_pin(obj, vm, - i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL, - size, alignment, flags); +err: + __i915_vma_unpin(vma); + return ret; } int @@ -3794,14 +3740,35 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma; + int ret; BUG_ON(!view); - return i915_gem_object_do_pin(obj, &ggtt->base, view, - size, alignment, flags | PIN_GLOBAL); + vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (i915_vma_misplaced(vma, size, alignment, flags)) { + if (flags & PIN_NONBLOCK && + (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) + return -ENOSPC; + + WARN(i915_vma_is_pinned(vma), + "bo is already pinned in ggtt with incorrect alignment:" + " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," + " obj->map_and_fenceable=%d\n", + upper_32_bits(vma->node.start), + lower_32_bits(vma->node.start), + (long long)alignment, + !!(flags & PIN_MAPPABLE), + obj->map_and_fenceable); + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); } void diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9ea99e181997..a0759fe613f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -45,11 +45,10 @@ struct i915_execbuffer_params { struct drm_device *dev; struct drm_file *file; - u32 dispatch_flags; - u32 args_batch_start_offset; - u32 batch_obj_vm_offset; + struct i915_vma *batch; + u32 dispatch_flags; + u32 args_batch_start_offset; struct intel_engine_cs *engine; - struct drm_i915_gem_object *batch_obj; struct i915_gem_context *ctx; struct drm_i915_gem_request *request; }; @@ -102,6 +101,26 @@ eb_reset(struct eb_vmas *eb) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } +static struct i915_vma * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma; +} + static int eb_lookup_vmas(struct eb_vmas *eb, struct drm_i915_gem_exec_object2 *exec, @@ -198,35 +217,6 @@ err: return ret; } -static inline struct i915_vma * -eb_get_batch_vma(struct eb_vmas *eb) -{ - /* The batch is always the LAST item in the VMA list */ - struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list); - - return vma; -} - -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = eb_get_batch_vma(eb); - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; -} - static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) { if (eb->and < 0) { @@ -682,16 +672,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_HIGH; } - ret = i915_gem_object_pin(obj, vma->vm, - entry->pad_to_size, - entry->alignment, - flags); - if ((ret == -ENOSPC || ret == -E2BIG) && + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags); + if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) - ret = i915_gem_object_pin(obj, vma->vm, - entry->pad_to_size, - entry->alignment, - flags & ~PIN_MAPPABLE); + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags & ~PIN_MAPPABLE); if (ret) return ret; @@ -1251,11 +1241,11 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } -static struct drm_i915_gem_object* +static struct i915_vma* i915_gem_execbuffer_parse(struct intel_engine_cs *engine, struct drm_i915_gem_exec_object2 *shadow_exec_entry, - struct eb_vmas *eb, struct drm_i915_gem_object *batch_obj, + struct eb_vmas *eb, u32 batch_start_offset, u32 batch_len, bool is_master) @@ -1267,7 +1257,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool, PAGE_ALIGN(batch_len)); if (IS_ERR(shadow_batch_obj)) - return shadow_batch_obj; + return ERR_CAST(shadow_batch_obj); ret = intel_engine_cmd_parser(engine, batch_obj, @@ -1292,14 +1282,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, i915_gem_object_get(shadow_batch_obj); list_add_tail(&vma->exec_list, &eb->vmas); - shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; - - return shadow_batch_obj; + return vma; err: i915_gem_object_unpin_pages(shadow_batch_obj); if (ret == -EACCES) /* unhandled chained batch */ - return batch_obj; + return NULL; else return ERR_PTR(ret); } @@ -1380,11 +1368,11 @@ execbuf_submit(struct i915_execbuffer_params *params, } exec_len = args->batch_len; - exec_start = params->batch_obj_vm_offset + + exec_start = params->batch->node.start + params->args_batch_start_offset; if (exec_len == 0) - exec_len = params->batch_obj->base.size; + exec_len = params->batch->size; ret = params->engine->emit_bb_start(params->request, exec_start, exec_len, @@ -1488,7 +1476,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct eb_vmas *eb; - struct drm_i915_gem_object *batch_obj; struct drm_i915_gem_exec_object2 shadow_exec_entry; struct intel_engine_cs *engine; struct i915_gem_context *ctx; @@ -1582,7 +1569,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = eb_get_batch(eb); + params->batch = eb_get_batch(eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; @@ -1606,7 +1593,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (params->batch->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; @@ -1614,26 +1601,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->args_batch_start_offset = args->batch_start_offset; if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { - struct drm_i915_gem_object *parsed_batch_obj; - - parsed_batch_obj = i915_gem_execbuffer_parse(engine, - &shadow_exec_entry, - eb, - batch_obj, - args->batch_start_offset, - args->batch_len, - drm_is_current_master(file)); - if (IS_ERR(parsed_batch_obj)) { - ret = PTR_ERR(parsed_batch_obj); + struct i915_vma *vma; + + vma = i915_gem_execbuffer_parse(engine, &shadow_exec_entry, + params->batch->obj, + eb, + args->batch_start_offset, + args->batch_len, + drm_is_current_master(file)); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; } - /* - * parsed_batch_obj == batch_obj means batch not fully parsed: - * Accept, but don't promote to secure. - */ - - if (parsed_batch_obj != batch_obj) { + if (vma) { /* * Batch parsed and accepted: * @@ -1645,16 +1626,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ dispatch_flags |= I915_DISPATCH_SECURE; params->args_batch_start_offset = 0; - batch_obj = parsed_batch_obj; + params->batch = vma; } } - batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + params->batch->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ if (dispatch_flags & I915_DISPATCH_SECURE) { + struct drm_i915_gem_object *obj = params->batch->obj; + /* * So on first glance it looks freaky that we pin the batch here * outside of the reservation loop. But: @@ -1665,13 +1648,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (ret) goto err; - params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); - } else - params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); + params->batch = i915_gem_obj_to_ggtt(obj); + } /* Allocate a request for this batch buffer nice and early. */ params->request = i915_gem_request_alloc(engine, ctx); @@ -1694,12 +1676,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->file = file; params->engine = engine; params->dispatch_flags = dispatch_flags; - params->batch_obj = batch_obj; params->ctx = ctx; ret = execbuf_submit(params, args, &eb->vmas); err_request: - __i915_add_request(params->request, params->batch_obj, ret == 0); + __i915_add_request(params->request, params->batch->obj, ret == 0); err_batch_unpin: /* @@ -1709,8 +1690,7 @@ err_batch_unpin: * active. */ if (dispatch_flags & I915_DISPATCH_SECURE) - i915_gem_object_ggtt_unpin(batch_obj); - + i915_vma_unpin(params->batch); err: /* the request owns the ref now */ i915_gem_context_put(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index bab2bbdb1fb5..d93c7e04b21e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3657,13 +3657,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, return 0; if (vma->bound == 0 && vma->vm->allocate_va_range) { - /* XXX: i915_vma_pin() will fix this +- hack */ - __i915_vma_pin(vma); trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->node.size); - __i915_vma_unpin(vma); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a5bab5f061f1..9ed134576b2d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -606,6 +606,20 @@ i915_ggtt_view_equal(const struct i915_ggtt_view *a, return true; } +int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +/* Flags used by pin/bind&friends. */ +#define PIN_MAPPABLE BIT(0) +#define PIN_NONBLOCK BIT(1) +#define PIN_GLOBAL BIT(2) +#define PIN_OFFSET_BIAS BIT(3) +#define PIN_USER BIT(4) +#define PIN_UPDATE BIT(5) +#define PIN_ZONE_4G BIT(6) +#define PIN_HIGH BIT(7) +#define PIN_OFFSET_FIXED BIT(8) +#define PIN_OFFSET_MASK (~4095) + static inline int i915_vma_pin_count(const struct i915_vma *vma) { return vma->pin_count; -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx