Re: [PATCH 5/7] drm/i915: Dynamically allocate the array of drm_i915_gem_fence_reg

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 11, 2018 at 08:36:06AM +0100, Chris Wilson wrote:
> If we dynamically allocate the correct sized array for the fence
> registers, we can avoid the 4x overallocation on older, typically
> smaller devices and avoid having to know the static layout in advance.
> 
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>

Feels a bit like micro-optimizing, but patch looks ok. I'd put these two
into i915_gem_fence_reg.h tough since you're already moving/creating them.
Either way:

Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx>

> ---
>  drivers/gpu/drm/i915/i915_gem.c           | 33 ------------
>  drivers/gpu/drm/i915/i915_gem_fence_reg.h |  2 -
>  drivers/gpu/drm/i915/i915_gem_gtt.c       | 64 +++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_gem_gtt.h       |  3 +-
>  drivers/gpu/drm/i915/i915_vma.h           |  1 +
>  5 files changed, 62 insertions(+), 41 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index cbcba613b175..8eecd68f9e23 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5636,39 +5636,6 @@ i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
>  		dev_priv->gt.cleanup_engine(engine);
>  }
>  
> -void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
> -{
> -	struct drm_i915_private *dev_priv = ggtt->vm.i915;
> -	int i;
> -
> -	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
> -	    !IS_CHERRYVIEW(dev_priv))
> -		ggtt->num_fence_regs = 32;
> -	else if (INTEL_GEN(dev_priv) >= 4 ||
> -		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
> -		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
> -		ggtt->num_fence_regs = 16;
> -	else
> -		ggtt->num_fence_regs = 8;
> -
> -	if (intel_vgpu_active(dev_priv))
> -		ggtt->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num));
> -
> -	INIT_LIST_HEAD(&ggtt->fence_list);
> -
> -	/* Initialize fence registers to zero */
> -	for (i = 0; i < ggtt->num_fence_regs; i++) {
> -		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
> -
> -		fence->ggtt = ggtt;
> -		fence->id = i;
> -		list_add_tail(&fence->link, &ggtt->fence_list);
> -	}
> -	i915_gem_restore_fences(dev_priv);
> -
> -	i915_gem_detect_bit_6_swizzle(dev_priv);
> -}
> -
>  static void i915_gem_init__mm(struct drm_i915_private *i915)
>  {
>  	spin_lock_init(&i915->mm.object_stat_lock);
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> index c510f8efc1bb..6e66f6b3f851 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> @@ -56,8 +56,6 @@ struct drm_i915_fence_reg {
>  	bool dirty;
>  };
>  
> -void i915_ggtt_init_fences(struct i915_ggtt *ggtt);
> -
>  struct drm_i915_fence_reg *
>  i915_reserve_fence(struct drm_i915_private *i915);
>  void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index abf41f90a925..e6787c3af544 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -37,6 +37,7 @@
>  #include <drm/i915_drm.h>
>  
>  #include "i915_drv.h"
> +#include "i915_gem_fence_reg.h"
>  #include "i915_vgpu.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> @@ -2901,6 +2902,51 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
>  	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
>  }
>  
> +static int i915_ggtt_init_fences(struct i915_ggtt *ggtt)
> +{
> +	struct drm_i915_private *dev_priv = ggtt->vm.i915;
> +	int i;
> +
> +	if (INTEL_GEN(dev_priv) >= 7 &&
> +	    !(IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)))
> +		ggtt->num_fence_regs = 32;
> +	else if (INTEL_GEN(dev_priv) >= 4 ||
> +		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
> +		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
> +		ggtt->num_fence_regs = 16;
> +	else
> +		ggtt->num_fence_regs = 8;
> +
> +	if (intel_vgpu_active(dev_priv))
> +		ggtt->num_fence_regs = I915_READ(vgtif_reg(avail_rs.fence_num));
> +
> +	ggtt->fence_regs = kcalloc(ggtt->num_fence_regs,
> +				   sizeof(*ggtt->fence_regs),
> +				   GFP_KERNEL);
> +	if (!ggtt->fence_regs)
> +		return -ENOMEM;
> +
> +	INIT_LIST_HEAD(&ggtt->fence_list);
> +
> +	/* Initialize fence registers to zero */
> +	for (i = 0; i < ggtt->num_fence_regs; i++) {
> +		struct drm_i915_fence_reg *fence = &ggtt->fence_regs[i];
> +
> +		fence->ggtt = ggtt;
> +		fence->id = i;
> +		list_add_tail(&fence->link, &ggtt->fence_list);
> +	}
> +	i915_gem_restore_fences(dev_priv);
> +
> +	i915_gem_detect_bit_6_swizzle(dev_priv);
> +	return 0;
> +}
> +
> +static void i915_ggtt_cleanup_fences(struct i915_ggtt *ggtt)
> +{
> +	kfree(ggtt->fence_regs);
> +}
> +
>  int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  {
>  	/* Let GEM Manage all of the aperture.
> @@ -2990,6 +3036,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
>  
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> +	i915_ggtt_cleanup_fences(ggtt);
> +
>  	arch_phys_wc_del(ggtt->mtrr);
>  	io_mapping_fini(&ggtt->iomap);
>  
> @@ -3595,13 +3643,15 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
>  		ggtt->vm.mm.color_adjust = i915_gtt_color_adjust;
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> -	i915_ggtt_init_fences(ggtt);
> +	ret = i915_ggtt_init_fences(ggtt);
> +	if (ret)
> +		goto err_fini;
>  
>  	if (!io_mapping_init_wc(&ggtt->iomap,
>  				ggtt->gmadr.start,
>  				ggtt->mappable_end)) {
>  		ret = -EIO;
> -		goto out_gtt_cleanup;
> +		goto err_fences;
>  	}
>  
>  	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
> @@ -3612,12 +3662,18 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
>  	 */
>  	ret = i915_gem_init_stolen(dev_priv);
>  	if (ret)
> -		goto out_gtt_cleanup;
> +		goto err_io;
>  
>  	return 0;
>  
> -out_gtt_cleanup:
> +err_io:
> +	arch_phys_wc_del(ggtt->mtrr);
> +	io_mapping_fini(&ggtt->iomap);
> +err_fences:
> +	i915_ggtt_cleanup_fences(ggtt);
> +err_fini:
>  	ggtt->vm.cleanup(&ggtt->vm);
> +	i915_address_space_fini(&ggtt->vm);
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index f35a85284b1a..f8c372dd6362 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -38,7 +38,6 @@
>  #include <linux/mm.h>
>  #include <linux/pagevec.h>
>  
> -#include "i915_gem_fence_reg.h"
>  #include "i915_request.h"
>  #include "i915_selftest.h"
>  #include "i915_timeline.h"
> @@ -398,7 +397,7 @@ struct i915_ggtt {
>  
>  	/** LRU list of objects with fence regs on them. */
>  	struct list_head fence_list;
> -	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
> +	struct drm_i915_fence_reg *fence_regs;
>  	int num_fence_regs;
>  
>  	struct drm_mm_node error_capture;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 925af79cc6d6..7df156e1ca06 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -30,6 +30,7 @@
>  
>  #include <drm/drm_mm.h>
>  
> +#include "i915_gem_fence_reg.h"
>  #include "i915_gem_gtt.h"
>  #include "i915_gem_object.h"
>  
> -- 
> 2.18.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux