From: John Harrison <John.C.Harrison@xxxxxxxxx> Split the execbuffer() function in half. The first half collects and validates all the information requried to process the batch buffer. It also does all the object pinning, relocations, active list management, etc - basically anything that must be done upfront before the IOCTL returns and allows the user land side to start changing/freeing things. The second half does the actual ring submission. This change implements the split but leaves the back half being called directly from the end of the front half. Change-Id: I5e1c77639ce526ab2401b0323186c518bf13da0a For: VIZ-1587 Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 39 ++++--- drivers/gpu/drm/i915/i915_gem.c | 2 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 172 +++++++++++++++++++--------- drivers/gpu/drm/i915/intel_lrc.c | 72 ++++++++---- drivers/gpu/drm/i915/intel_lrc.h | 10 +- 5 files changed, 197 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8628a83..f885a90 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1525,6 +1525,25 @@ struct i915_workarounds { u32 count; }; +struct i915_execbuffer_params { + struct drm_device *dev; + struct drm_file *file; + uint32_t dispatch_flags; + uint32_t args_flags; + uint32_t args_batch_start_offset; + uint32_t args_batch_len; + uint32_t args_num_cliprects; + uint32_t args_DR1; + uint32_t args_DR4; + uint32_t batch_obj_vm_offset; + struct intel_engine_cs *ring; + struct drm_i915_gem_object *batch_obj; + struct drm_clip_rect *cliprects; + uint32_t instp_mask; + int instp_mode; + struct intel_context *ctx; +}; + struct drm_i915_private { struct drm_device *dev; struct kmem_cache *slab; @@ -1777,13 +1796,10 @@ struct drm_i915_private { struct { int (*alloc_request)(struct intel_engine_cs *ring, struct intel_context *ctx); - int (*do_execbuf)(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, - struct intel_context *ctx, + int (*do_execbuf)(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas, - struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags); + struct list_head *vmas); + int (*do_execfinal)(struct i915_execbuffer_params *params); int (*init_rings)(struct drm_device *dev); void (*cleanup_ring)(struct intel_engine_cs *ring); void (*stop_ring)(struct intel_engine_cs *ring); @@ -2464,14 +2480,11 @@ void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, struct drm_i915_gem_object *obj); -int i915_gem_ringbuffer_submission(struct drm_device *dev, - struct drm_file *file, - struct intel_engine_cs *ring, - struct intel_context *ctx, +void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj); +int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *qe, struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas, - struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags); + struct list_head *vmas); +int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 09332ff..031af9d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4872,12 +4872,14 @@ int i915_gem_init(struct drm_device *dev) if (!i915.enable_execlists) { dev_priv->gt.alloc_request = intel_ring_alloc_request; dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission; + dev_priv->gt.do_execfinal = i915_gem_ringbuffer_submission_final; dev_priv->gt.init_rings = i915_gem_init_rings; dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; dev_priv->gt.stop_ring = intel_stop_ring_buffer; } else { dev_priv->gt.alloc_request = intel_logical_ring_alloc_request; dev_priv->gt.do_execbuf = intel_execlists_submission; + dev_priv->gt.do_execfinal = intel_execlists_submission_final; dev_priv->gt.init_rings = intel_logical_rings_init; dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; dev_priv->gt.stop_ring = intel_logical_ring_stop; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a339556..3e56494 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1062,20 +1062,14 @@ i915_emit_box(struct intel_engine_cs *ring, int -i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, - struct intel_context *ctx, +i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas, - struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 dispatch_flags) + struct list_head *vmas) { - struct drm_clip_rect *cliprects = NULL; + struct drm_device *dev = params->dev; + struct intel_engine_cs *ring = params->ring; struct drm_i915_private *dev_priv = dev->dev_private; - u64 exec_len; - int instp_mode; - u32 instp_mask; - int i, ret = 0; + int ret = 0; if (args->num_cliprects != 0) { if (ring != &dev_priv->ring[RCS]) { @@ -1088,23 +1082,23 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, return -EINVAL; } - if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { + if (args->num_cliprects > UINT_MAX / sizeof(*params->cliprects)) { DRM_DEBUG("execbuf with %u cliprects\n", args->num_cliprects); return -EINVAL; } - cliprects = kcalloc(args->num_cliprects, - sizeof(*cliprects), + params->cliprects = kcalloc(args->num_cliprects, + sizeof(*params->cliprects), GFP_KERNEL); - if (cliprects == NULL) { + if (params->cliprects == NULL) { ret = -ENOMEM; goto error; } - if (copy_from_user(cliprects, + if (copy_from_user(params->cliprects, to_user_ptr(args->cliprects_ptr), - sizeof(*cliprects)*args->num_cliprects)) { + sizeof(*params->cliprects)*args->num_cliprects)) { ret = -EFAULT; goto error; } @@ -1120,19 +1114,19 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, } } - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { + params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; + params->instp_mask = I915_EXEC_CONSTANTS_MASK; + switch (params->instp_mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) { + if (params->instp_mode != 0 && ring != &dev_priv->ring[RCS]) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); ret = -EINVAL; goto error; } - if (instp_mode != dev_priv->relative_constants_mode) { + if (params->instp_mode != dev_priv->relative_constants_mode) { if (INTEL_INFO(dev)->gen < 4) { DRM_DEBUG("no rel constants on pre-gen4\n"); ret = -EINVAL; @@ -1140,7 +1134,7 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, } if (INTEL_INFO(dev)->gen > 5 && - instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { + params->instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); ret = -EINVAL; goto error; @@ -1148,11 +1142,11 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, /* The HW changed the meaning on this bit on gen6 */ if (INTEL_INFO(dev)->gen >= 6) - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; + params->instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; } break; default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + DRM_DEBUG("execbuf with unknown constants: %d\n", params->instp_mode); ret = -EINVAL; goto error; } @@ -1163,7 +1157,36 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, i915_gem_execbuffer_move_to_active(vmas, ring); - /* To be split into two functions here... */ + ret = dev_priv->gt.do_execfinal(params); + if (ret) + goto error; + + /* Free everything that was stored in the QE structure (until the + * scheduler arrives and does it instead): */ + kfree(params->cliprects); + if (params->dispatch_flags & I915_DISPATCH_SECURE) + i915_gem_execbuff_release_batch_obj(params->batch_obj); + + return 0; + +error: + kfree(params->cliprects); + return ret; +} + +/* + * This is the main function for adding a batch to the ring. + * It is called from the scheduler, with the struct_mutex already held. + */ +int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params) +{ + struct drm_i915_private *dev_priv = params->dev->dev_private; + struct intel_engine_cs *ring = params->ring; + u64 exec_start, exec_len; + int ret, i; + + /* The mutex must be acquired before calling this function */ + BUG_ON(!mutex_is_locked(¶ms->dev->struct_mutex)); intel_runtime_pm_get(dev_priv); @@ -1175,12 +1198,12 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, goto error; /* Switch to the correct context for the batch */ - ret = i915_switch_context(ring, ctx); + ret = i915_switch_context(ring, params->ctx); if (ret) goto error; if (ring == &dev_priv->ring[RCS] && - instp_mode != dev_priv->relative_constants_mode) { + params->instp_mode != dev_priv->relative_constants_mode) { ret = intel_ring_begin(ring, 4); if (ret) goto error; @@ -1188,50 +1211,53 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, INSTPM); - intel_ring_emit(ring, instp_mask << 16 | instp_mode); + intel_ring_emit(ring, params->instp_mask << 16 | params->instp_mode); intel_ring_advance(ring); - dev_priv->relative_constants_mode = instp_mode; + dev_priv->relative_constants_mode = params->instp_mode; } - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { - ret = i915_reset_gen7_sol_offsets(dev, ring); + if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) { + ret = i915_reset_gen7_sol_offsets(params->dev, ring); if (ret) goto error; } - exec_len = args->batch_len; - if (cliprects) { - for (i = 0; i < args->num_cliprects; i++) { - ret = i915_emit_box(ring, &cliprects[i], - args->DR1, args->DR4); + exec_len = params->args_batch_len; + exec_start = params->batch_obj_vm_offset + + params->args_batch_start_offset; + + if (params->cliprects) { + for (i = 0; i < params->args_num_cliprects; i++) { + ret = i915_emit_box(ring, ¶ms->cliprects[i], + params->args_DR1, params->args_DR4); if (ret) goto error; ret = ring->dispatch_execbuffer(ring, exec_start, exec_len, - dispatch_flags); + params->dispatch_flags); if (ret) goto error; } } else { ret = ring->dispatch_execbuffer(ring, exec_start, exec_len, - dispatch_flags); + params->dispatch_flags); if (ret) goto error; } - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags); + trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags); - i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); + i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, + params->batch_obj); error: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); - kfree(cliprects); return ret; } @@ -1297,8 +1323,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct intel_engine_cs *ring; struct intel_context *ctx; struct i915_address_space *vm; + struct i915_execbuffer_params params_master; /* XXX: will be removed later */ + struct i915_execbuffer_params *params = ¶ms_master; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); - u64 exec_start = args->batch_start_offset; u32 dispatch_flags; int ret; bool need_relocs, batch_pinned = false; @@ -1367,6 +1394,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, else vm = &dev_priv->gtt.base; + memset(¶ms_master, 0x00, sizeof(params_master)); + eb = eb_create(args); if (eb == NULL) { i915_gem_context_unreference(ctx); @@ -1449,30 +1478,52 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; batch_pinned = true; - exec_start += i915_gem_obj_ggtt_offset(batch_obj); + params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); } else - exec_start += i915_gem_obj_offset(batch_obj, vm); + params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); /* Allocate a request for this batch buffer nice and early. */ ret = dev_priv->gt.alloc_request(ring, ctx); if (ret) goto err; - ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args, - &eb->vmas, batch_obj, exec_start, - dispatch_flags); + /* Save assorted stuff away to pass through to *_submission_final() */ + params->dev = dev; + params->file = file; + params->ring = ring; + params->dispatch_flags = dispatch_flags; + params->args_flags = args->flags; + params->args_batch_start_offset = args->batch_start_offset; + params->args_batch_len = args->batch_len; + params->args_num_cliprects = args->num_cliprects; + params->args_DR1 = args->DR1; + params->args_DR4 = args->DR4; + params->batch_obj = batch_obj; + params->ctx = ctx; + + ret = dev_priv->gt.do_execbuf(params, args, &eb->vmas); + if (ret) + goto err; + + /* the request owns the ref now */ + i915_gem_context_unreference(ctx); + + /* The eb list is no longer required. The scheduler has extracted all + * the information than needs to persist. */ + eb_destroy(eb); -err: /* - * FIXME: We crucially rely upon the active tracking for the (ppgtt) - * batch vma for correctness. For less ugly and less fragility this - * needs to be adjusted to also track the ggtt batch vma properly as - * active. + * Don't clean up everything that is now saved away in the queue. + * Just unlock and return immediately. */ + mutex_unlock(&dev->struct_mutex); + + return ret; + +err: if (batch_pinned) - i915_gem_object_ggtt_unpin(batch_obj); + i915_gem_execbuff_release_batch_obj(batch_obj); - /* the request owns the ref now */ i915_gem_context_unreference(ctx); eb_destroy(eb); @@ -1482,6 +1533,17 @@ pre_mutex_err: return ret; } +void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj) +{ + /* + * FIXME: We crucially rely upon the active tracking for the (ppgtt) + * batch vma for correctness. For less ugly and less fragility this + * needs to be adjusted to also track the ggtt batch vma properly as + * active. + */ + i915_gem_object_ggtt_unpin(batch_obj); +} + /* * Legacy execbuffer just creates an exec2 list from the original exec object * list array and passes it to the real function. diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f16b15d..971ba0c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -650,43 +650,39 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, * * Return: non-zero if the submission fails. */ -int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, - struct intel_context *ctx, +int intel_execlists_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas, - struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 dispatch_flags) + struct list_head *vmas) { + struct drm_device *dev = params->dev; + struct intel_engine_cs *ring = params->ring; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; - int instp_mode; - u32 instp_mask; + struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf; int ret; - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { + params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; + params->instp_mask = I915_EXEC_CONSTANTS_MASK; + switch (params->instp_mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) { + if (params->instp_mode != 0 && ring != &dev_priv->ring[RCS]) { DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); return -EINVAL; } - if (instp_mode != dev_priv->relative_constants_mode) { - if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { + if (params->instp_mode != dev_priv->relative_constants_mode) { + if (params->instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); return -EINVAL; } /* The HW changed the meaning on this bit on gen6 */ - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; + params->instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; } break; default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + DRM_DEBUG("execbuf with unknown constants: %d\n", params->instp_mode); return -EINVAL; } @@ -716,7 +712,32 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, i915_gem_execbuffer_move_to_active(vmas, ring); - /* To be split into two functions here... */ + ret = dev_priv->gt.do_execfinal(params); + if (ret) + return ret; + + /* Free everything that was stored in the QE structure (until the + * scheduler arrives and does it instead): */ + if (params->dispatch_flags & I915_DISPATCH_SECURE) + i915_gem_execbuff_release_batch_obj(params->batch_obj); + + return 0; +} + +/* + * This is the main function for adding a batch to the ring. + * It is called from the scheduler, with the struct_mutex already held. + */ +int intel_execlists_submission_final(struct i915_execbuffer_params *params) +{ + struct drm_i915_private *dev_priv = params->dev->dev_private; + struct intel_engine_cs *ring = params->ring; + struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf; + u64 exec_start; + int ret; + + /* The mutex must be acquired before calling this function */ + BUG_ON(!mutex_is_locked(¶ms->dev->struct_mutex)); /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. @@ -726,7 +747,7 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, return ret; if (ring == &dev_priv->ring[RCS] && - instp_mode != dev_priv->relative_constants_mode) { + params->instp_mode != dev_priv->relative_constants_mode) { ret = intel_logical_ring_begin(ringbuf, 4); if (ret) return ret; @@ -734,19 +755,22 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, intel_logical_ring_emit(ringbuf, MI_NOOP); intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); intel_logical_ring_emit(ringbuf, INSTPM); - intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode); + intel_logical_ring_emit(ringbuf, params->instp_mask << 16 | params->instp_mode); intel_logical_ring_advance(ringbuf); - dev_priv->relative_constants_mode = instp_mode; + dev_priv->relative_constants_mode = params->instp_mode; } - ret = ring->emit_bb_start(ringbuf, exec_start, dispatch_flags); + exec_start = params->batch_obj_vm_offset + + params->args_batch_start_offset; + + ret = ring->emit_bb_start(ringbuf, exec_start, params->dispatch_flags); if (ret) return ret; - trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags); + trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), params->dispatch_flags); - i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); + i915_gem_execbuffer_retire_commands(params->dev, params->file, ring, params->batch_obj); return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 2bf868a..ea083d9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -76,13 +76,11 @@ void intel_lr_context_unpin(struct intel_engine_cs *ring, /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); -int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, - struct intel_context *ctx, +struct i915_execbuffer_params; +int intel_execlists_submission(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas, - struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 dispatch_flags); + struct list_head *vmas); +int intel_execlists_submission_final(struct i915_execbuffer_params *params); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); /** -- 1.7.9.5 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx