Re: [PATCH v4 04/38] drm/i915: Split i915_dem_do_execbuffer() in half

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 01/11/2016 10:42 AM, John.C.Harrison@xxxxxxxxx wrote:
> From: John Harrison <John.C.Harrison@xxxxxxxxx>
> 
> Split the execbuffer() function in half. The first half collects and
> validates all the information required to process the batch buffer. It
> also does all the object pinning, relocations, active list management,
> etc - basically anything that must be done upfront before the IOCTL
> returns and allows the user land side to start changing/freeing
> things. The second half does the actual ring submission.
> 
> This change implements the split but leaves the back half being called
> directly from the end of the front half.
> 
> v2: Updated due to changes in underlying tree - addition of sync fence
> support and removal of cliprects.
> 
> v3: Moved local 'ringbuf' variable to make later patches in the
> series a bit neater.
> 
> v4: Corrected a typo in the commit message and downgraded a BUG_ON to
> a WARN_ON as the latter is preferred. Also removed all the external
> sync/fence support as that will now be a separate patch series.
> 
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  11 ++++
>  drivers/gpu/drm/i915/i915_gem.c            |   2 +
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 101 ++++++++++++++++++++++-------
>  drivers/gpu/drm/i915/intel_lrc.c           |  57 +++++++++++-----
>  drivers/gpu/drm/i915/intel_lrc.h           |   1 +
>  5 files changed, 130 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index acfe25f..2520459 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1679,10 +1679,18 @@ struct i915_execbuffer_params {
>  	struct drm_device               *dev;
>  	struct drm_file                 *file;
>  	uint32_t                        dispatch_flags;
> +	uint32_t                        args_flags;
>  	uint32_t                        args_batch_start_offset;
> +	uint32_t                        args_batch_len;
> +	uint32_t                        args_num_cliprects;
> +	uint32_t                        args_DR1;
> +	uint32_t                        args_DR4;
>  	uint64_t                        batch_obj_vm_offset;
>  	struct intel_engine_cs          *ring;
>  	struct drm_i915_gem_object      *batch_obj;
> +	struct drm_clip_rect            *cliprects;
> +	uint32_t                        instp_mask;
> +	int                             instp_mode;
>  	struct intel_context            *ctx;
>  	struct drm_i915_gem_request     *request;
>  };
> @@ -1944,6 +1952,7 @@ struct drm_i915_private {
>  		int (*execbuf_submit)(struct i915_execbuffer_params *params,
>  				      struct drm_i915_gem_execbuffer2 *args,
>  				      struct list_head *vmas);
> +		int (*execbuf_final)(struct i915_execbuffer_params *params);
>  		int (*init_rings)(struct drm_device *dev);
>  		void (*cleanup_ring)(struct intel_engine_cs *ring);
>  		void (*stop_ring)(struct intel_engine_cs *ring);
> @@ -2790,9 +2799,11 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
>  void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>  					struct drm_i915_gem_request *req);
>  void i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params);
> +void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj);
>  int i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  				   struct drm_i915_gem_execbuffer2 *args,
>  				   struct list_head *vmas);
> +int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params);
>  int i915_gem_execbuffer(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  int i915_gem_execbuffer2(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e8ec49e..5bf7da6 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5220,11 +5220,13 @@ int i915_gem_init(struct drm_device *dev)
>  
>  	if (!i915.enable_execlists) {
>  		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
> +		dev_priv->gt.execbuf_final = i915_gem_ringbuffer_submission_final;
>  		dev_priv->gt.init_rings = i915_gem_init_rings;
>  		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
>  		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
>  	} else {
>  		dev_priv->gt.execbuf_submit = intel_execlists_submission;
> +		dev_priv->gt.execbuf_final = intel_execlists_submission_final;
>  		dev_priv->gt.init_rings = intel_logical_rings_init;
>  		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
>  		dev_priv->gt.stop_ring = intel_logical_ring_stop;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0eca2b6..eff171d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1181,41 +1181,38 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  	struct drm_device *dev = params->dev;
>  	struct intel_engine_cs *ring = params->ring;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u64 exec_start, exec_len;
> -	int instp_mode;
> -	u32 instp_mask;
>  	int ret;
>  
> -	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
> -	instp_mask = I915_EXEC_CONSTANTS_MASK;
> -	switch (instp_mode) {
> +	params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
> +	params->instp_mask = I915_EXEC_CONSTANTS_MASK;
> +	switch (params->instp_mode) {
>  	case I915_EXEC_CONSTANTS_REL_GENERAL:
>  	case I915_EXEC_CONSTANTS_ABSOLUTE:
>  	case I915_EXEC_CONSTANTS_REL_SURFACE:
> -		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
> +		if (params->instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
>  			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
>  			return -EINVAL;
>  		}
>  
> -		if (instp_mode != dev_priv->relative_constants_mode) {
> +		if (params->instp_mode != dev_priv->relative_constants_mode) {
>  			if (INTEL_INFO(dev)->gen < 4) {
>  				DRM_DEBUG("no rel constants on pre-gen4\n");
>  				return -EINVAL;
>  			}
>  
>  			if (INTEL_INFO(dev)->gen > 5 &&
> -			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
> +			    params->instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
>  				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
>  				return -EINVAL;
>  			}
>  
>  			/* The HW changed the meaning on this bit on gen6 */
>  			if (INTEL_INFO(dev)->gen >= 6)
> -				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
> +				params->instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
>  		}
>  		break;
>  	default:
> -		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
> +		DRM_DEBUG("execbuf with unknown constants: %d\n", params->instp_mode);
>  		return -EINVAL;
>  	}
>  
> @@ -1225,7 +1222,33 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  
>  	i915_gem_execbuffer_move_to_active(vmas, params->request);
>  
> -	/* To be split into two functions here... */
> +	ret = dev_priv->gt.execbuf_final(params);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Free everything that was stored in the QE structure (until the
> +	 * scheduler arrives and does it instead):
> +	 */
> +	if (params->dispatch_flags & I915_DISPATCH_SECURE)
> +		i915_gem_execbuff_release_batch_obj(params->batch_obj);
> +
> +	return 0;
> +}
> +
> +/*
> + * This is the main function for adding a batch to the ring.
> + * It is called from the scheduler, with the struct_mutex already held.
> + */
> +int i915_gem_ringbuffer_submission_final(struct i915_execbuffer_params *params)
> +{
> +	struct drm_i915_private *dev_priv = params->dev->dev_private;
> +	struct intel_engine_cs  *ring = params->ring;
> +	u64 exec_start, exec_len;
> +	int ret;
> +
> +	/* The mutex must be acquired before calling this function */
> +	WARN_ON(!mutex_is_locked(&params->dev->struct_mutex));
>  
>  	intel_runtime_pm_get(dev_priv);
>  
> @@ -1246,7 +1269,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  	     "%s didn't clear reload\n", ring->name);
>  
>  	if (ring == &dev_priv->ring[RCS] &&
> -	    instp_mode != dev_priv->relative_constants_mode) {
> +	    params->instp_mode != dev_priv->relative_constants_mode) {
>  		ret = intel_ring_begin(params->request, 4);
>  		if (ret)
>  			goto error;
> @@ -1254,19 +1277,19 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  		intel_ring_emit(ring, MI_NOOP);
>  		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>  		intel_ring_emit(ring, INSTPM);
> -		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
> +		intel_ring_emit(ring, params->instp_mask << 16 | params->instp_mode);
>  		intel_ring_advance(ring);
>  
> -		dev_priv->relative_constants_mode = instp_mode;
> +		dev_priv->relative_constants_mode = params->instp_mode;
>  	}
>  
> -	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
> -		ret = i915_reset_gen7_sol_offsets(dev, params->request);
> +	if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) {
> +		ret = i915_reset_gen7_sol_offsets(params->dev, params->request);
>  		if (ret)
>  			goto error;
>  	}
>  
> -	exec_len   = args->batch_len;
> +	exec_len   = params->args_batch_len;
>  	exec_start = params->batch_obj_vm_offset +
>  		     params->args_batch_start_offset;
>  
> @@ -1584,23 +1607,40 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	params->file                    = file;
>  	params->ring                    = ring;
>  	params->dispatch_flags          = dispatch_flags;
> +	params->args_flags              = args->flags;
> +	params->args_batch_len          = args->batch_len;
> +	params->args_num_cliprects      = args->num_cliprects;
> +	params->args_DR1                = args->DR1;
> +	params->args_DR4                = args->DR4;
>  	params->batch_obj               = batch_obj;
>  	params->ctx                     = ctx;
>  
>  	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
> +	if (ret)
> +		goto err_batch_unpin;
> +
> +	/* the request owns the ref now */
> +	i915_gem_context_unreference(ctx);
>  
> -err_batch_unpin:
>  	/*
> -	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
> -	 * batch vma for correctness. For less ugly and less fragility this
> -	 * needs to be adjusted to also track the ggtt batch vma properly as
> -	 * active.
> +	 * The eb list is no longer required. The scheduler has extracted all
> +	 * the information than needs to persist.
>  	 */
> +	eb_destroy(eb);
> +
> +	/*
> +	 * Don't clean up everything that is now saved away in the queue.
> +	 * Just unlock and return immediately.
> +	 */
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	return 0;
> +
> +err_batch_unpin:
>  	if (dispatch_flags & I915_DISPATCH_SECURE)
> -		i915_gem_object_ggtt_unpin(batch_obj);
> +		i915_gem_execbuff_release_batch_obj(batch_obj);
>  
>  err:
> -	/* the request owns the ref now */
>  	i915_gem_context_unreference(ctx);
>  	eb_destroy(eb);
>  
> @@ -1618,6 +1658,17 @@ pre_mutex_err:
>  	return ret;
>  }
>  
> +void i915_gem_execbuff_release_batch_obj(struct drm_i915_gem_object *batch_obj)
> +{
> +	/*
> +	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
> +	 * batch vma for correctness. For less ugly and less fragility this
> +	 * needs to be adjusted to also track the ggtt batch vma properly as
> +	 * active.
> +	 */
> +	i915_gem_object_ggtt_unpin(batch_obj);
> +}
> +
>  /*
>   * Legacy execbuffer just creates an exec2 list from the original exec object
>   * list array and passes it to the real function.
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4bf0ee6..014180c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -869,35 +869,31 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  	struct drm_device       *dev = params->dev;
>  	struct intel_engine_cs  *ring = params->ring;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
> -	u64 exec_start;
> -	int instp_mode;
> -	u32 instp_mask;
>  	int ret;
>  
> -	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
> -	instp_mask = I915_EXEC_CONSTANTS_MASK;
> -	switch (instp_mode) {
> +	params->instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
> +	params->instp_mask = I915_EXEC_CONSTANTS_MASK;
> +	switch (params->instp_mode) {
>  	case I915_EXEC_CONSTANTS_REL_GENERAL:
>  	case I915_EXEC_CONSTANTS_ABSOLUTE:
>  	case I915_EXEC_CONSTANTS_REL_SURFACE:
> -		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
> +		if (params->instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
>  			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
>  			return -EINVAL;
>  		}
>  
> -		if (instp_mode != dev_priv->relative_constants_mode) {
> -			if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
> +		if (params->instp_mode != dev_priv->relative_constants_mode) {
> +			if (params->instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
>  				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
>  				return -EINVAL;
>  			}
>  
>  			/* The HW changed the meaning on this bit on gen6 */
> -			instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
> +			params->instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
>  		}
>  		break;
>  	default:
> -		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
> +		DRM_DEBUG("execbuf with unknown constants: %d\n", params->instp_mode);
>  		return -EINVAL;
>  	}
>  
> @@ -912,7 +908,34 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  
>  	i915_gem_execbuffer_move_to_active(vmas, params->request);
>  
> -	/* To be split into two functions here... */
> +	ret = dev_priv->gt.execbuf_final(params);
> +	if (ret)
> +		return ret;
> +
> +	/*
> +	 * Free everything that was stored in the QE structure (until the
> +	 * scheduler arrives and does it instead):
> +	 */
> +	if (params->dispatch_flags & I915_DISPATCH_SECURE)
> +		i915_gem_execbuff_release_batch_obj(params->batch_obj);
> +
> +	return 0;
> +}
> +
> +/*
> + * This is the main function for adding a batch to the ring.
> + * It is called from the scheduler, with the struct_mutex already held.
> + */
> +int intel_execlists_submission_final(struct i915_execbuffer_params *params)
> +{
> +	struct drm_i915_private *dev_priv = params->dev->dev_private;
> +	struct intel_ringbuffer *ringbuf = params->request->ringbuf;
> +	struct intel_engine_cs *ring = params->ring;
> +	u64 exec_start;
> +	int ret;
> +
> +	/* The mutex must be acquired before calling this function */
> +	WARN_ON(!mutex_is_locked(&params->dev->struct_mutex));
>  
>  	/*
>  	 * Unconditionally invalidate gpu caches and ensure that we do flush
> @@ -923,7 +946,7 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  		return ret;
>  
>  	if (ring == &dev_priv->ring[RCS] &&
> -	    instp_mode != dev_priv->relative_constants_mode) {
> +	    params->instp_mode != dev_priv->relative_constants_mode) {
>  		ret = intel_logical_ring_begin(params->request, 4);
>  		if (ret)
>  			return ret;
> @@ -931,14 +954,14 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  		intel_logical_ring_emit(ringbuf, MI_NOOP);
>  		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
>  		intel_logical_ring_emit(ringbuf, INSTPM);
> -		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
> +		intel_logical_ring_emit(ringbuf, params->instp_mask << 16 | params->instp_mode);
>  		intel_logical_ring_advance(ringbuf);
>  
> -		dev_priv->relative_constants_mode = instp_mode;
> +		dev_priv->relative_constants_mode = params->instp_mode;
>  	}
>  
>  	exec_start = params->batch_obj_vm_offset +
> -		     args->batch_start_offset;
> +		     params->args_batch_start_offset;
>  
>  	ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
>  	if (ret)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 4e60d54..8d9bad7 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -93,6 +93,7 @@ struct i915_execbuffer_params;
>  int intel_execlists_submission(struct i915_execbuffer_params *params,
>  			       struct drm_i915_gem_execbuffer2 *args,
>  			       struct list_head *vmas);
> +int intel_execlists_submission_final(struct i915_execbuffer_params *params);
>  u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
>  
>  void intel_lrc_irq_handler(struct intel_engine_cs *ring);

Just a nitpick on naming; I think prepare/commit might be better than submit/submit_final.  But you don't have to respin just for that.

Reviewed-by: Jesse Barnes <jbarnes@xxxxxxxxxxxxxxxx>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux