During execbuffer we look up the i915_vma in order to reserver them in the VM. However, we then do a double lookup of the vma in order to then pin them, all because we lack the necessary interfaces to operate on i915_vma. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 13 +++ drivers/gpu/drm/i915/i915_gem.c | 170 ++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 114 ++++++++++--------- 3 files changed, 154 insertions(+), 143 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2c72ee0214b5..ba593ee78863 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2637,6 +2637,19 @@ void i915_gem_close_object(struct drm_gem_object *obj, void i915_gem_free_object(struct drm_gem_object *obj); void i915_gem_vma_destroy(struct i915_vma *vma); +int __must_check +i915_vma_pin(struct i915_vma *vma, + uint32_t size, + uint32_t alignment, + uint64_t flags); + +static inline void i915_vma_unpin(struct i915_vma *vma) +{ + WARN_ON(vma->pin_count == 0); + WARN_ON(!drm_mm_node_allocated(&vma->node)); + vma->pin_count--; +} + #define PIN_MAPPABLE 0x1 #define PIN_NONBLOCK 0x2 #define PIN_GLOBAL 0x4 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3d4463930267..7b27236f2c29 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3644,10 +3644,9 @@ static bool i915_gem_valid_gtt_space(struct i915_vma *vma, /** * Finds free space in the GTT aperture and binds the object there. */ -static struct i915_vma * +static int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *ggtt_view, + struct i915_vma *vma, uint32_t size, unsigned alignment, uint64_t flags) @@ -3658,13 +3657,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, unsigned long start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; unsigned long end = - flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; - struct i915_vma *vma; + flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vma->vm->total; int ret; - if (WARN_ON(vm->is_ggtt != !!ggtt_view)) - return ERR_PTR(-EINVAL); - fence_size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode); @@ -3681,7 +3676,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, unfenced_alignment; if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) { DRM_DEBUG("Invalid object alignment requested %u\n", alignment); - return ERR_PTR(-EINVAL); + return -EINVAL; } size = max_t(u32, size, obj->base.size); @@ -3696,57 +3691,51 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, size, obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", end); - return ERR_PTR(-E2BIG); + return -E2BIG; } ret = i915_gem_object_get_pages(obj); if (ret) - return ERR_PTR(ret); + return ret; i915_gem_object_pin_pages(obj); - vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : - i915_gem_obj_lookup_or_create_vma(obj, vm); - - if (IS_ERR(vma)) - goto err_unpin; - if (flags & PIN_OFFSET_FIXED) { uint64_t offset = flags & PIN_OFFSET_MASK; if (offset & (alignment - 1)) { - vma = ERR_PTR(-EINVAL); - goto err_free_vma; + ret = -EINVAL; + goto err_unpin; } vma->node.start = offset; vma->node.size = size; vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vm->mm, &vma->node); + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); if (ret) { - ret = i915_gem_evict_range(dev, vm, start, end); + ret = i915_gem_evict_range(dev, vma->vm, start, end); if (ret == 0) - ret = drm_mm_reserve_node(&vm->mm, &vma->node); - } - if (ret) { - vma = ERR_PTR(ret); - goto err_free_vma; + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) + goto err_unpin; } } else { search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, + &vma->node, size, alignment, obj->cache_level, start, end, DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT); if (ret) { - ret = i915_gem_evict_something(dev, vm, size, alignment, + ret = i915_gem_evict_something(dev, vma->vm, + size, alignment, obj->cache_level, start, end, flags); if (ret == 0) goto search_free; - goto err_free_vma; + goto err_unpin; } } if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { @@ -3775,20 +3764,17 @@ search_free: goto err_finish_gtt; list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); - list_add_tail(&vma->mm_list, &vm->inactive_list); + list_add_tail(&vma->mm_list, &vma->vm->inactive_list); - return vma; + return 0; err_finish_gtt: i915_gem_gtt_finish_object(obj); err_remove_node: drm_mm_remove_node(&vma->node); -err_free_vma: - i915_gem_vma_destroy(vma); - vma = ERR_PTR(ret); err_unpin: i915_gem_object_unpin_pages(obj); - return vma; + return ret; } bool @@ -4347,6 +4333,65 @@ i915_vma_misplaced(struct i915_vma *vma, return false; } +int +i915_vma_pin(struct i915_vma *vma, + uint32_t size, + uint32_t alignment, + uint64_t flags) +{ + struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + unsigned bound = vma->bound; + int ret; + + if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) + return -EBUSY; + + + if (!drm_mm_node_allocated(&vma->node)) { + /* In true PPGTT, bind has possibly changed PDEs, which + * means we must do a context switch before the GPU can + * accurately read some of the VMAs. + */ + ret = i915_gem_object_bind_to_vm(obj, vma, + size, alignment, flags); + if (ret) + return ret; + } + + if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND)) { + ret = i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND); + if (ret) + return ret; + } + + if ((bound ^ vma->bound) & GLOBAL_BIND) { + bool mappable, fenceable; + u32 fence_size, fence_alignment; + + fence_size = i915_gem_get_gtt_size(obj->base.dev, + obj->base.size, + obj->tiling_mode); + fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, + obj->base.size, + obj->tiling_mode, + true); + + fenceable = (vma->node.size >= fence_size && + (vma->node.start & (fence_alignment - 1)) == 0); + + mappable = (vma->node.start + fence_size <= + dev_priv->gtt.mappable_end); + + obj->map_and_fenceable = mappable && fenceable; + } + + WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); + + vma->pin_count++; + return 0; +} + static int i915_gem_object_do_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -4357,7 +4402,6 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; struct i915_vma *vma; - unsigned bound; int ret; if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base)) @@ -4379,9 +4423,6 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, return PTR_ERR(vma); if (vma) { - if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) - return -EBUSY; - if (i915_vma_misplaced(vma, size, alignment, flags)) { unsigned long offset; offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view) : @@ -4403,49 +4444,14 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, } } - bound = vma ? vma->bound : 0; - if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { - /* In true PPGTT, bind has possibly changed PDEs, which - * means we must do a context switch before the GPU can - * accurately read some of the VMAs. - */ - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, - size, alignment, flags); + if (vma == NULL) { + vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) : + i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) return PTR_ERR(vma); } - if (flags & PIN_GLOBAL && !(vma->bound & GLOBAL_BIND)) { - ret = i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND); - if (ret) - return ret; - } - - if ((bound ^ vma->bound) & GLOBAL_BIND) { - bool mappable, fenceable; - u32 fence_size, fence_alignment; - - fence_size = i915_gem_get_gtt_size(obj->base.dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, - obj->base.size, - obj->tiling_mode, - true); - - fenceable = (vma->node.size >= fence_size && - (vma->node.start & (fence_alignment - 1)) == 0); - - mappable = (vma->node.start + fence_size <= - dev_priv->gtt.mappable_end); - - obj->map_and_fenceable = mappable && fenceable; - } - - WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); - - vma->pin_count++; - return 0; + return i915_vma_pin(vma, size, alignment, flags); } int @@ -4478,13 +4484,7 @@ void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { - struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); - - BUG_ON(!vma); - WARN_ON(vma->pin_count == 0); - WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view)); - - --vma->pin_count; + i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); } bool diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index bd48393fb91f..734a7ef56a93 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -42,6 +42,7 @@ struct eb_vmas { struct list_head vmas; + struct i915_vma *batch_vma; int and; union { struct i915_vma *lut[0]; @@ -88,6 +89,26 @@ eb_reset(struct eb_vmas *eb) memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); } +static struct i915_vma * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + + return vma; +} + static int eb_lookup_vmas(struct eb_vmas *eb, struct drm_i915_gem_exec_object2 *exec, @@ -165,6 +186,9 @@ eb_lookup_vmas(struct eb_vmas *eb, ++i; } + /* take note of the batch buffer before we might reorder the lists */ + eb->batch_vma = eb_get_batch(eb); + return 0; @@ -644,16 +668,16 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= entry->offset | PIN_OFFSET_FIXED; } - ret = i915_gem_object_pin(obj, vma->vm, - entry->pad_to_size, - entry->alignment, - flags); - if ((ret == -ENOSPC || ret == -E2BIG) && + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags); + if ((ret == -ENOSPC || ret == -E2BIG) && only_mappable_for_reloc(entry->flags)) - ret = i915_gem_object_pin(obj, vma->vm, - entry->pad_to_size, - entry->alignment, - flags & ~(PIN_GLOBAL | PIN_MAPPABLE)); + ret = i915_vma_pin(vma, + entry->pad_to_size, + entry->alignment, + flags & ~(PIN_GLOBAL | PIN_MAPPABLE)); if (ret) return ret; @@ -1194,11 +1218,10 @@ i915_emit_box(struct intel_engine_cs *ring, return 0; } -static struct drm_i915_gem_object* +static struct i915_vma* i915_gem_execbuffer_parse(struct intel_engine_cs *ring, struct drm_i915_gem_exec_object2 *shadow_exec_entry, struct eb_vmas *eb, - struct drm_i915_gem_object *batch_obj, u32 batch_start_offset, u32 batch_len, bool is_master) @@ -1210,10 +1233,10 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring, shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool, PAGE_ALIGN(batch_len)); if (IS_ERR(shadow_batch_obj)) - return shadow_batch_obj; + return ERR_CAST(shadow_batch_obj); ret = i915_parse_cmds(ring, - batch_obj, + eb->batch_vma->obj, shadow_batch_obj, batch_start_offset, batch_len, @@ -1235,14 +1258,12 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring, drm_gem_object_reference(&shadow_batch_obj->base); list_add_tail(&vma->exec_list, &eb->vmas); - shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; - - return shadow_batch_obj; + return vma; err: i915_gem_object_unpin_pages(shadow_batch_obj); if (ret == -EACCES) /* unhandled chained batch */ - return batch_obj; + return NULL; else return ERR_PTR(ret); } @@ -1442,26 +1463,6 @@ static int gen8_dispatch_bsd_ring(struct drm_device *dev, } } -static struct drm_i915_gem_object * -eb_get_batch(struct eb_vmas *eb) -{ - struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; - - return vma->obj; -} - static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1470,7 +1471,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, { struct drm_i915_private *dev_priv = dev->dev_private; struct eb_vmas *eb; - struct drm_i915_gem_object *batch_obj; struct drm_i915_gem_exec_object2 shadow_exec_entry; struct intel_engine_cs *ring; struct intel_context *ctx; @@ -1582,9 +1582,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; - /* take note of the batch buffer before we might reorder the lists */ - batch_obj = eb_get_batch(eb); - /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); @@ -1605,24 +1602,25 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (eb->batch_vma->obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } if (i915_needs_cmd_parser(ring) && args->batch_len) { - batch_obj = i915_gem_execbuffer_parse(ring, - &shadow_exec_entry, - eb, - batch_obj, - args->batch_start_offset, - args->batch_len, - file->is_master); - if (IS_ERR(batch_obj)) { - ret = PTR_ERR(batch_obj); + struct i915_vma *vma; + + vma = i915_gem_execbuffer_parse(ring, &shadow_exec_entry, eb, + args->batch_start_offset, + args->batch_len, + file->is_master); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; } + if (vma) + eb->batch_vma = vma; /* * Set the DISPATCH_SECURE bit to remove the NON_SECURE @@ -1641,7 +1639,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, exec_start = 0; } - batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; + eb->batch_vma->obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. @@ -1657,17 +1655,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + ret = i915_gem_obj_ggtt_pin(eb->batch_vma->obj, 0, 0); if (ret) goto err; - exec_start += i915_gem_obj_ggtt_offset(batch_obj); + exec_start += i915_gem_obj_ggtt_offset(eb->batch_vma->obj); } else - exec_start += i915_gem_obj_offset(batch_obj, vm); + exec_start += eb->batch_vma->node.start; ret = dev_priv->gt.execbuf_submit(dev, file, ring, ctx, args, - &eb->vmas, batch_obj, exec_start, - dispatch_flags); + &eb->vmas, eb->batch_vma->obj, + exec_start, dispatch_flags); /* * FIXME: We crucially rely upon the active tracking for the (ppgtt) @@ -1676,7 +1674,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * active. */ if (dispatch_flags & I915_DISPATCH_SECURE) - i915_gem_object_ggtt_unpin(batch_obj); + i915_vma_unpin(eb->batch_vma); err: /* the request owns the ref now */ i915_gem_context_unreference(ctx); -- 2.1.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx