On Mon, 2014-05-05 at 16:07 +0200, Daniel Vetter wrote: > On Mon, May 05, 2014 at 05:13:18PM +0530, akash.goel@xxxxxxxxx wrote: > > From: Akash Goel <akash.goel@xxxxxxxxx> > > > > There is a use case, when user space (display compositor) tries > > to directly flip a fb (without any prior rendering) on primary > > plane. So the backing pages of the object are allocated at page > > flip time only, which takes time. Since, this buffer is supposed to > > serve as a blanking buffer (black colored), we can setup all the GTT entries > > of that blanking buffer with scratch page (which is already zeroed out). > > This saves the time in allocation of real backing physical space for the > > blanking buffer and flushing of CPU cache. > > > > Signed-off-by: Akash Goel <akash.goel@xxxxxxxxx> > > That sounds very much like a special case of the fallocate ioctl where we > simply allocat 0 real backing storage pages. > -Daniel > Hi Daniel, Yes no real backing storage is needed, but still the object should be allowed to get mapped into the GGTT, using the scratch page (already zeroed out). Is there a patch for a new drm/i915 ioctl similar to fallocate, which could be used here? Best regards Akash > > --- > > drivers/gpu/drm/i915/i915_gem.c | 18 ++++++++- > > drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++++ > > drivers/gpu/drm/i915/intel_display.c | 72 ++++++++++++++++++++++++++++++++++++ > > 3 files changed, 97 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index b19ccb8..7c3963c 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -1781,6 +1781,17 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) > > * lists early. */ > > list_del(&obj->global_list); > > > > + /* > > + * If so far the object was backed up by a scratch page, then remove > > + * that association & make it reusable as a normal Gem object > > + */ > > + if ((unsigned long)obj->pages == (unsigned long)(obj)) { > > + obj->pages = NULL; > > + obj->base.read_domains = obj->base.write_domain = > > + I915_GEM_DOMAIN_CPU; > > + return 0; > > + } > > + > > ops->put_pages(obj); > > obj->pages = NULL; > > > > @@ -3772,7 +3783,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > > if (ret) > > goto err_unpin_display; > > > > - i915_gem_object_flush_cpu_write_domain(obj, true); > > + /* > > + * Check if object is backed up by a scratch page, in that case CPU > > + * cache flush is not required, thus skip it. > > + */ > > + if ((unsigned long)(obj->pages) != (unsigned long)obj) > > + i915_gem_object_flush_cpu_write_domain(obj, true); > > > > old_write_domain = obj->base.write_domain; > > old_read_domains = obj->base.read_domains; > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > > index f6354e0..fb3193a 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > > @@ -1566,6 +1566,14 @@ static void ggtt_bind_vma(struct i915_vma *vma, > > if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) { > > if (!obj->has_global_gtt_mapping || > > (cache_level != obj->cache_level)) { > > + if ((unsigned long)(obj->pages) == (unsigned long)obj) { > > + /* Leave the scratch page mapped into the GTT > > + * entries of the object, as it is actually > > + * supposed to be backed up by scratch page > > + * only */ > > + obj->has_global_gtt_mapping = 1; > > + return; > > + } > > vma->vm->insert_entries(vma->vm, obj->pages, > > vma->node.start, > > cache_level); > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > > index 59303213..dff85e4 100644 > > --- a/drivers/gpu/drm/i915/intel_display.c > > +++ b/drivers/gpu/drm/i915/intel_display.c > > @@ -40,6 +40,7 @@ > > #include <drm/drm_dp_helper.h> > > #include <drm/drm_crtc_helper.h> > > #include <linux/dma_remapping.h> > > +#include <linux/shmem_fs.h> > > > > static void intel_increase_pllclock(struct drm_crtc *crtc); > > static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on); > > @@ -8469,6 +8470,75 @@ static void intel_crtc_destroy(struct drm_crtc *crtc) > > kfree(intel_crtc); > > } > > > > +static inline void > > +intel_use_srcatch_page_for_fb(struct drm_i915_gem_object *obj) > > +{ > > + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; > > + int ret; > > + > > + /* A fb being flipped without having any allocated backing physical > > + * space (without any prior rendering), is most probably going to be > > + * used as a blanking buffer (black colored). So instead of allocating > > + * the real backing physical space for this buffer, we can try to > > + * currently back this object by a scratch page, which is already > > + * allocated. So we check if no shmem data pages have been allocated to > > + * the fb we can back it by a scratch page and thus save time by avoid- > > + * ing allocation of backing physicl space & subsequent CPU cache flush > > + */ > > + if (obj->base.filp) { > > + struct inode *inode = file_inode(obj->base.filp); > > + struct shmem_inode_info *info = SHMEM_I(inode); > > + spin_lock(&info->lock); > > + ret = info->alloced; > > + spin_unlock(&info->lock); > > + if ((ret == 0) && (obj->pages == NULL)) { > > + /* > > + * Set the 'pages' field with the object pointer > > + * itself, this will avoid the need of a new field in > > + * obj structure to identify the object backed up by a > > + * scratch page and will also avoid the call to > > + * 'get_pages', thus also saving on the time required > > + * for allocation of 'scatterlist' structure. > > + */ > > + obj->pages = (struct sg_table *)(obj); > > + > > + /* > > + * To avoid calls to gtt prepare & finish, as those > > + * will dereference the 'pages' field > > + */ > > + obj->has_dma_mapping = 1; > > + list_add_tail(&obj->global_list, > > + &dev_priv->mm.unbound_list); > > + > > + trace_printk("Using Scratch page for obj %p\n", obj); > > + } > > + } > > +} > > + > > +static inline void > > +intel_drop_srcatch_page_for_fb(struct drm_i915_gem_object *obj) > > +{ > > + int ret; > > + /* > > + * Unmap the object backed up by scratch page, as it is no > > + * longer being scanned out and thus it can be now allowed > > + * to be used as a normal object. > > + * Assumption: The User space will ensure that only when the > > + * object is no longer being scanned out, it will be reused > > + * for rendering. This is a valid assumption as there is no > > + * such handling in driver for other regular fb objects also. > > + */ > > + if ((unsigned long)obj->pages == > > + (unsigned long)obj) { > > + ret = i915_gem_object_ggtt_unbind(obj); > > + /* EBUSY is ok: this means that pin count is still not zero */ > > + if (ret && ret != -EBUSY) > > + DRM_ERROR("unbind error %d\n", ret); > > + i915_gem_object_put_pages(obj); > > + obj->has_dma_mapping = 0; > > + } > > +} > > + > > static void intel_unpin_work_fn(struct work_struct *__work) > > { > > struct intel_unpin_work *work = > > @@ -8477,6 +8547,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) > > > > mutex_lock(&dev->struct_mutex); > > intel_unpin_fb_obj(work->old_fb_obj); > > + intel_drop_srcatch_page_for_fb(work->old_fb_obj); > > drm_gem_object_unreference(&work->pending_flip_obj->base); > > drm_gem_object_unreference(&work->old_fb_obj->base); > > > > @@ -8770,6 +8841,7 @@ static int intel_gen7_queue_flip(struct drm_device *dev, > > if (IS_VALLEYVIEW(dev) || ring == NULL || ring->id != RCS) > > ring = &dev_priv->ring[BCS]; > > > > + intel_use_srcatch_page_for_fb(obj); > > ret = intel_pin_and_fence_fb_obj(dev, obj, ring); > > if (ret) > > goto err; > > -- > > 1.9.2 > > > > _______________________________________________ > > Intel-gfx mailing list > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx