Re: [PATCH 112/190] drm/i915: Move obj->active:5 to obj->flags

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jan 11, 2016 at 10:44:56AM +0000, Chris Wilson wrote:
> We are motivated to avoid using a bitfield for obj->active for a couple
> of reasons. Firstly, we wish to document our lockless read of obj->active
> using READ_ONCE inside i915_gem_busy_ioctl() and that requires an
> integral type (i.e. not a bitfield). Secondly, gcc produces abysmal code
> when presented with a bitfield and that shows up high on the profiles of
> request tracking (mainly due to excess memory traffic as it converts
> the bitfield to a register and back and generates frequent AGI in the
> process).
> 
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

This patch, together with it's dirty counterpart, seems to make a lot of sense,
but it seems the pre-requisites to make it apply are rather extensive;
I tried to tweak it to apply to a nightly, but that's not trivial.

Still, the concept seems sounds.  I dunno if there's much point of this
right now, since it cannot be merged without the pre-requisites, but:

Reviewed-by: David Weinehall <david.weinehall@xxxxxxxxx>

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  2 +-
>  drivers/gpu/drm/i915/i915_drv.h            | 31 +++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem.c            | 20 +++++++++----------
>  drivers/gpu/drm/i915/i915_gem_batch_pool.c |  4 ++--
>  drivers/gpu/drm/i915/i915_gem_context.c    |  2 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +++++-----
>  drivers/gpu/drm/i915/i915_gem_gtt.c        |  2 +-
>  drivers/gpu/drm/i915/i915_gem_shrinker.c   |  5 +++--
>  8 files changed, 53 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index dee66807c6bd..6b14c59828e3 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -136,7 +136,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>  
>  	seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x [ ",
>  		   &obj->base,
> -		   obj->active ? "*" : " ",
> +		   i915_gem_object_is_active(obj) ? "*" : " ",
>  		   get_pin_flag(obj),
>  		   get_tiling_flag(obj),
>  		   get_global_flag(obj),
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index efa43411f0eb..1ecff535973e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2031,12 +2031,16 @@ struct drm_i915_gem_object {
>  
>  	struct list_head batch_pool_link;
>  
> +	unsigned long flags;
>  	/**
>  	 * This is set if the object is on the active lists (has pending
>  	 * rendering and so a non-zero seqno), and is not set if it i s on
>  	 * inactive (ready to be unbound) list.
>  	 */
> -	unsigned int active:I915_NUM_RINGS;
> +#define I915_BO_ACTIVE_SHIFT 0
> +#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_RINGS) - 1)
> +#define I915_BO_ACTIVE(bo) ((bo)->flags & (I915_BO_ACTIVE_MASK << I915_BO_ACTIVE_SHIFT))
> +#define __I915_BO_ACTIVE(bo) (READ_ONCE((bo)->flags) & (I915_BO_ACTIVE_MASK << I915_BO_ACTIVE_SHIFT))
>  
>  	/**
>  	 * This is set if the object has been written to since last bound
> @@ -2151,6 +2155,31 @@ struct drm_i915_gem_object {
>  #define GEM_BUG_ON(expr)
>  #endif
>  
> +static inline bool
> +i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
> +{
> +	return obj->flags & (I915_BO_ACTIVE_MASK << I915_BO_ACTIVE_SHIFT);
> +}
> +
> +static inline void
> +i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
> +{
> +	obj->flags |= 1 << (engine + I915_BO_ACTIVE_SHIFT);
> +}
> +
> +static inline void
> +i915_gem_object_unset_active(struct drm_i915_gem_object *obj, int engine)
> +{
> +	obj->flags &= ~(1 << (engine + I915_BO_ACTIVE_SHIFT));
> +}
> +
> +static inline bool
> +i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
> +				  int engine)
> +{
> +	return obj->flags & (1 << (engine + I915_BO_ACTIVE_SHIFT));
> +}
> +
>  void i915_gem_track_fb(struct drm_i915_gem_object *old,
>  		       struct drm_i915_gem_object *new,
>  		       unsigned frontbuffer_bits);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 74c56716a304..6712ecf1239b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1130,7 +1130,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  {
>  	int ret, i;
>  
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		return 0;
>  
>  	if (readonly) {
> @@ -1143,7 +1143,7 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  			if (ret)
>  				return ret;
>  		}
> -		GEM_BUG_ON(obj->active);
> +		GEM_BUG_ON(i915_gem_object_is_active(obj));
>  	}
>  
>  	return 0;
> @@ -1165,7 +1165,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
>  	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
>  	BUG_ON(!dev_priv->mm.interruptible);
>  
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		return 0;
>  
>  	if (readonly) {
> @@ -2080,10 +2080,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active,
>  	struct drm_i915_gem_object *obj =
>  		container_of(active, struct drm_i915_gem_object, last_read[ring]);
>  
> -	GEM_BUG_ON((obj->active & (1 << ring)) == 0);
> +	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, ring));
>  
> -	obj->active &= ~(1 << ring);
> -	if (obj->active)
> +	i915_gem_object_unset_active(obj, ring);
> +	if (i915_gem_object_is_active(obj))
>  		return;
>  
>  	/* Bump our place on the bound list to keep it roughly in LRU order
> @@ -2373,7 +2373,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
>  {
>  	int i;
>  
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		return;
>  
>  	for (i = 0; i < I915_NUM_RINGS; i++) {
> @@ -2459,7 +2459,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>  
>  	/* Need to make sure the object gets inactive eventually. */
>  	i915_gem_object_flush_active(obj);
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		goto out;
>  
>  	/* Do this after OLR check to make sure we make forward progress polling
> @@ -2557,7 +2557,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>  	struct drm_i915_gem_request *req[I915_NUM_RINGS];
>  	int ret, i, n;
>  
> -	if (!obj->active)
> +	if (!i915_gem_object_is_active(obj))
>  		return 0;
>  
>  	n = 0;
> @@ -3593,7 +3593,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	i915_gem_object_flush_active(obj);
>  
>  	BUILD_BUG_ON(I915_NUM_RINGS > 16);
> -	args->busy = obj->active << 16;
> +	args->busy = I915_BO_ACTIVE(obj) << 16;
>  	if (obj->last_write.request)
>  		args->busy |= obj->last_write.request->engine->id;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> index d4318665ac6c..5ec5b1439e1f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
> @@ -115,14 +115,14 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
>  
>  	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
>  		/* The batches are strictly LRU ordered */
> -		if (tmp->active) {
> +		if (i915_gem_object_is_active(tmp)) {
>  			struct drm_i915_gem_request *rq;
>  
>  			rq = tmp->last_read[pool->engine->id].request;
>  			if (!i915_gem_request_completed(rq))
>  				break;
>  
> -			GEM_BUG_ON(tmp->active & ~intel_engine_flag(pool->engine));
> +			GEM_BUG_ON((tmp->flags >> I915_BO_ACTIVE_SHIFT) & (~intel_engine_flag(pool->engine) & I915_BO_ACTIVE_MASK));
>  			GEM_BUG_ON(tmp->last_write.request);
>  		}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 15d5a5d247e0..9250a7405807 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -427,7 +427,7 @@ void i915_gem_context_fini(struct drm_device *dev)
>  		WARN_ON(!dev_priv->ring[RCS].last_context);
>  		if (dev_priv->ring[RCS].last_context == dctx) {
>  			/* Fake switch to NULL context */
> -			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
> +			WARN_ON(i915_gem_object_is_active(dctx->legacy_hw_ctx.rcs_state));
>  			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
>  			i915_gem_context_unreference(dctx);
>  			dev_priv->ring[RCS].last_context = NULL;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 79dbd74b73c2..e66864bdbfb4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -515,7 +515,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  	}
>  
>  	/* We can't wait for rendering with pagefaults disabled */
> -	if (obj->active && pagefault_disabled())
> +	if (i915_gem_object_is_active(obj) && pagefault_disabled())
>  		return -EFAULT;
>  
>  	if (use_cpu_reloc(obj))
> @@ -977,7 +977,7 @@ static int
>  i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  				struct list_head *vmas)
>  {
> -	const unsigned other_rings = ~intel_engine_flag(req->engine);
> +	const unsigned other_rings = (~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK) << I915_BO_ACTIVE_SHIFT;
>  	struct i915_vma *vma;
>  	uint32_t flush_domains = 0;
>  	bool flush_chipset = false;
> @@ -986,7 +986,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  	list_for_each_entry(vma, vmas, exec_list) {
>  		struct drm_i915_gem_object *obj = vma->obj;
>  
> -		if (obj->active & other_rings) {
> +		if (obj->flags & other_rings) {
>  			ret = i915_gem_object_sync(obj, req);
>  			if (ret)
>  				return ret;
> @@ -1145,9 +1145,9 @@ void i915_vma_move_to_active(struct i915_vma *vma,
>  	 * add the active reference first and queue for it to be dropped
>  	 * *last*.
>  	 */
> -	if (obj->active == 0)
> +	if (!i915_gem_object_is_active(obj))
>  		drm_gem_object_reference(&obj->base);
> -	obj->active |= 1 << engine;
> +	i915_gem_object_set_active(obj, engine);
>  	i915_gem_request_mark_active(req, &obj->last_read[engine]);
>  
>  	if (flags & EXEC_OBJECT_WRITE) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8f3b2f051918..6652df57e5b0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3229,7 +3229,7 @@ i915_vma_retire(struct i915_gem_active *active,
>  		container_of(active, struct i915_vma, last_read[engine]);
>  
>  	GEM_BUG_ON((vma->active & (1 << engine)) == 0);
> -	GEM_BUG_ON((vma->obj->active & vma->active) != vma->active);
> +	GEM_BUG_ON(((vma->obj->flags >> I915_BO_ACTIVE_SHIFT) & vma->active) != vma->active);
>  
>  	vma->active &= ~(1 << engine);
>  	if (vma->active)
> diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> index 67f3eb9a8391..4d44def8fb03 100644
> --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
> @@ -150,7 +150,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
>  			    obj->madv != I915_MADV_DONTNEED)
>  				continue;
>  
> -			if ((flags & I915_SHRINK_ACTIVE) == 0 && obj->active)
> +			if ((flags & I915_SHRINK_ACTIVE) == 0 &&
> +			    i915_gem_object_is_active(obj))
>  				continue;
>  
>  			if (!can_release_pages(obj))
> @@ -233,7 +234,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>  			count += obj->base.size >> PAGE_SHIFT;
>  
>  	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> -		if (!obj->active && can_release_pages(obj))
> +		if (!i915_gem_object_is_active(obj) && can_release_pages(obj))
>  			count += obj->base.size >> PAGE_SHIFT;
>  	}
>  
> -- 
> 2.7.0.rc3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux