Re: [RFC] drm/i915: Scratch page optimization for blanking buffer

Akash Goel <akash.goel@xxxxxxxxx> · Wed, 07 May 2014 11:19:57 +0530

On Mon, 2014-05-05 at 16:07 +0200, Daniel Vetter wrote:
> On Mon, May 05, 2014 at 05:13:18PM +0530, akash.goel@xxxxxxxxx wrote:
> > From: Akash Goel <akash.goel@xxxxxxxxx>
> > 
> > There is a use case, when user space (display compositor) tries
> > to directly flip a fb (without any prior rendering) on primary
> > plane. So the backing pages of the object are allocated at page
> > flip time only, which takes time. Since, this buffer is supposed to
> > serve as a blanking buffer (black colored), we can setup all the GTT entries
> > of that blanking buffer with scratch page (which is already zeroed out).
> > This saves the time in allocation of real backing physical space for the
> > blanking buffer and flushing of CPU cache.
> > 
> > Signed-off-by: Akash Goel <akash.goel@xxxxxxxxx>
> 
> That sounds very much like a special case of the fallocate ioctl where we
> simply allocat 0 real backing storage pages.
> -Daniel
> 

Hi Daniel,

Yes no real backing storage is needed, but still the object should be
allowed to get mapped into the GGTT, using the scratch page (already
zeroed out).

Is there a patch for a new drm/i915 ioctl similar to fallocate, which
could be used here?

Best regards
Akash

> > ---
> >  drivers/gpu/drm/i915/i915_gem.c      | 18 ++++++++-
> >  drivers/gpu/drm/i915/i915_gem_gtt.c  |  8 ++++
> >  drivers/gpu/drm/i915/intel_display.c | 72 ++++++++++++++++++++++++++++++++++++
> >  3 files changed, 97 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index b19ccb8..7c3963c 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1781,6 +1781,17 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> >  	 * lists early. */
> >  	list_del(&obj->global_list);
> >  
> > +	/*
> > +	 * If so far the object was backed up by a scratch page, then remove
> > +	 * that association & make it reusable as a normal Gem object
> > +	 */
> > +	if ((unsigned long)obj->pages == (unsigned long)(obj)) {
> > +		obj->pages = NULL;
> > +		obj->base.read_domains = obj->base.write_domain =
> > +						I915_GEM_DOMAIN_CPU;
> > +		return 0;
> > +	}
> > +
> >  	ops->put_pages(obj);
> >  	obj->pages = NULL;
> >  
> > @@ -3772,7 +3783,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> >  	if (ret)
> >  		goto err_unpin_display;
> >  
> > -	i915_gem_object_flush_cpu_write_domain(obj, true);
> > +	/*
> > +	 * Check if object is backed up by a scratch page, in that case CPU
> > +	 * cache flush is not required, thus skip it.
> > +	 */
> > +	if ((unsigned long)(obj->pages) != (unsigned long)obj)
> > +		i915_gem_object_flush_cpu_write_domain(obj, true);
> >  
> >  	old_write_domain = obj->base.write_domain;
> >  	old_read_domains = obj->base.read_domains;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index f6354e0..fb3193a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -1566,6 +1566,14 @@ static void ggtt_bind_vma(struct i915_vma *vma,
> >  	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
> >  		if (!obj->has_global_gtt_mapping ||
> >  		    (cache_level != obj->cache_level)) {
> > +			if ((unsigned long)(obj->pages) == (unsigned long)obj) {
> > +				/* Leave the scratch page mapped into the GTT
> > +				 * entries of the object, as it is actually
> > +				 * supposed to be backed up by scratch page
> > +				 * only */
> > +				obj->has_global_gtt_mapping = 1;
> > +				return;
> > +			}
> >  			vma->vm->insert_entries(vma->vm, obj->pages,
> >  						vma->node.start,
> >  						cache_level);
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 59303213..dff85e4 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -40,6 +40,7 @@
> >  #include <drm/drm_dp_helper.h>
> >  #include <drm/drm_crtc_helper.h>
> >  #include <linux/dma_remapping.h>
> > +#include <linux/shmem_fs.h>
> >  
> >  static void intel_increase_pllclock(struct drm_crtc *crtc);
> >  static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
> > @@ -8469,6 +8470,75 @@ static void intel_crtc_destroy(struct drm_crtc *crtc)
> >  	kfree(intel_crtc);
> >  }
> >  
> > +static inline void
> > +intel_use_srcatch_page_for_fb(struct drm_i915_gem_object *obj)
> > +{
> > +	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> > +	int ret;
> > +
> > +	/* A fb being flipped without having any allocated backing physical
> > +	 * space (without any prior rendering), is most probably going to be
> > +	 * used as a blanking buffer (black colored). So instead of allocating
> > +	 * the real backing physical space for this buffer, we can try to
> > +	 * currently back this object by a scratch page, which is already
> > +	 * allocated. So we check if no shmem data pages have been allocated to
> > +	 * the fb we can back it by a scratch page and thus save time by avoid-
> > +	 * ing allocation of backing physicl space & subsequent CPU cache flush
> > +	 */
> > +	if (obj->base.filp) {
> > +		struct inode *inode = file_inode(obj->base.filp);
> > +		struct shmem_inode_info *info = SHMEM_I(inode);
> > +		spin_lock(&info->lock);
> > +		ret = info->alloced;
> > +		spin_unlock(&info->lock);
> > +		if ((ret == 0) && (obj->pages == NULL)) {
> > +			/*
> > +			 * Set the 'pages' field with the object pointer
> > +			 * itself, this will avoid the need of a new field in
> > +			 * obj structure to identify the object backed up by a
> > +			 * scratch page and will also avoid the call to
> > +			 * 'get_pages', thus also saving on the time required
> > +			 * for allocation of 'scatterlist' structure.
> > +			 */
> > +			obj->pages = (struct sg_table *)(obj);
> > +
> > +			/*
> > +			 * To avoid calls to gtt prepare & finish, as those
> > +			 * will dereference the 'pages' field
> > +			 */
> > +			obj->has_dma_mapping = 1;
> > +			list_add_tail(&obj->global_list,
> > +					&dev_priv->mm.unbound_list);
> > +
> > +			trace_printk("Using Scratch page for obj %p\n", obj);
> > +		}
> > +	}
> > +}
> > +
> > +static inline void
> > +intel_drop_srcatch_page_for_fb(struct drm_i915_gem_object *obj)
> > +{
> > +	int ret;
> > +	/*
> > +	 * Unmap the object backed up by scratch page, as it is no
> > +	 * longer being scanned out and thus it can be now allowed
> > +	 * to be used as a normal object.
> > +	 * Assumption: The User space will ensure that only when the
> > +	 * object is no longer being scanned out, it will be reused
> > +	 * for rendering. This is a valid assumption as there is no
> > +	 * such handling in driver for other regular fb objects also.
> > +	 */
> > +	if ((unsigned long)obj->pages ==
> > +				(unsigned long)obj) {
> > +		ret = i915_gem_object_ggtt_unbind(obj);
> > +		/* EBUSY is ok: this means that pin count is still not zero */
> > +		if (ret && ret != -EBUSY)
> > +			DRM_ERROR("unbind error %d\n", ret);
> > +		i915_gem_object_put_pages(obj);
> > +		obj->has_dma_mapping = 0;
> > +	}
> > +}
> > +
> >  static void intel_unpin_work_fn(struct work_struct *__work)
> >  {
> >  	struct intel_unpin_work *work =
> > @@ -8477,6 +8547,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
> >  
> >  	mutex_lock(&dev->struct_mutex);
> >  	intel_unpin_fb_obj(work->old_fb_obj);
> > +	intel_drop_srcatch_page_for_fb(work->old_fb_obj);
> >  	drm_gem_object_unreference(&work->pending_flip_obj->base);
> >  	drm_gem_object_unreference(&work->old_fb_obj->base);
> >  
> > @@ -8770,6 +8841,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
> >  	if (IS_VALLEYVIEW(dev) || ring == NULL || ring->id != RCS)
> >  		ring = &dev_priv->ring[BCS];
> >  
> > +	intel_use_srcatch_page_for_fb(obj);
> >  	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
> >  	if (ret)
> >  		goto err;
> > -- 
> > 1.9.2
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx