On 17 February 2017 at 14:07, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > Flushing the cachelines for an object is slow, can be as much as 100ms > for a large framebuffer. We currently do this under the struct_mutex BKL > on execution or on pageflip. But now with the ability to add fences to > obj->resv for both flips and execbuf (and we naturally wait on the fence > before CPU access), we can move the clflush operation to a workqueue and > signal a fence for completion, thereby doing the work asynchronously and > not blocking the driver or its clients. > > Suggested-by: Akash Goel <akash.goel@xxxxxxxxx> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Matthew Auld <matthew.auld@xxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 1 + > drivers/gpu/drm/i915/i915_drv.h | 8 +- > drivers/gpu/drm/i915/i915_gem.c | 66 ++-------- > drivers/gpu/drm/i915/i915_gem_clflush.c | 194 +++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 +- > drivers/gpu/drm/i915/intel_display.c | 49 ++++---- > 6 files changed, 240 insertions(+), 86 deletions(-) > create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 1490d8622234..b1b580337c7a 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o > # GEM code > i915-y += i915_cmd_parser.o \ > i915_gem_batch_pool.o \ > + i915_gem_clflush.o \ > i915_gem_context.o \ > i915_gem_dmabuf.o \ > i915_gem_evict.o \ > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 5005922f267b..97d3ecbcf8d4 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3369,7 +3369,13 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); > void i915_gem_reset(struct drm_i915_private *dev_priv); > void i915_gem_reset_finish(struct drm_i915_private *dev_priv); > void i915_gem_set_wedged(struct drm_i915_private *dev_priv); > -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); > + > +void i915_gem_clflush_init(struct drm_i915_private *i915); > +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, > + unsigned int flags); > +#define I915_CLFLUSH_FORCE BIT(0) > +#define I915_CLFLUSH_SYNC BIT(1) > + > void i915_gem_init_mmio(struct drm_i915_private *i915); > int __must_check i915_gem_init(struct drm_i915_private *dev_priv); > int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 96098a7e0bc3..fc11f5205e00 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1621,23 +1621,17 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, > { > struct drm_i915_gem_sw_finish *args = data; > struct drm_i915_gem_object *obj; > - int err = 0; > > obj = i915_gem_object_lookup(file, args->handle); > if (!obj) > return -ENOENT; > > /* Pinned buffers may be scanout, so flush the cache */ > - if (READ_ONCE(obj->pin_display)) { > - err = i915_mutex_lock_interruptible(dev); > - if (!err) { > - i915_gem_object_flush_cpu_write_domain(obj); > - mutex_unlock(&dev->struct_mutex); > - } > - } > + if (READ_ONCE(obj->pin_display)) > + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); > > i915_gem_object_put(obj); > - return err; > + return 0; > } > > /** > @@ -3155,46 +3149,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) > return 0; > } > > -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, > - bool force) > -{ > - /* If we don't have a page list set up, then we're not pinned > - * to GPU, and we can ignore the cache flush because it'll happen > - * again at bind time. > - */ > - if (!obj->mm.pages) { > - GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); > - return; > - } > - > - /* > - * Stolen memory is always coherent with the GPU as it is explicitly > - * marked as wc by the system, or the system is cache-coherent. > - * Similarly, we only access struct pages through the CPU cache, so > - * anything not backed by physical memory we consider to be always > - * coherent and not need clflushing. > - */ > - if (!i915_gem_object_has_struct_page(obj)) > - return; > - > - /* If the GPU is snooping the contents of the CPU cache, > - * we do not need to manually clear the CPU cache lines. However, > - * the caches are only snooped when the render cache is > - * flushed/invalidated. As we always have to emit invalidations > - * and flushes when moving into and out of the RENDER domain, correct > - * snooping behaviour occurs naturally as the result of our domain > - * tracking. > - */ > - if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { > - obj->cache_dirty = true; > - return; > - } > - > - trace_i915_gem_object_clflush(obj); > - drm_clflush_sg(obj->mm.pages); > - obj->cache_dirty = false; > -} > - > /** Flushes the GTT write domain for the object if it's dirty. */ > static void > i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) > @@ -3238,8 +3192,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) > if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) > return; > > - i915_gem_clflush_object(obj, obj->pin_display); > - intel_fb_obj_flush(obj, false, ORIGIN_CPU); > + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); > > obj->base.write_domain = 0; > trace_i915_gem_object_change_domain(obj, > @@ -3610,10 +3563,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > vma->display_alignment = max_t(u64, vma->display_alignment, alignment); > > /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ > - if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { > - i915_gem_clflush_object(obj, true); > - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); > - } > + if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) > + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); > > old_write_domain = obj->base.write_domain; > old_read_domains = obj->base.read_domains; > @@ -3687,8 +3638,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) > > /* Flush the CPU cache if it's still invalid. */ > if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { > - i915_gem_clflush_object(obj, false); > - > + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); > obj->base.read_domains |= I915_GEM_DOMAIN_CPU; > } > > @@ -4560,6 +4510,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) > > mutex_lock(&dev_priv->drm.struct_mutex); > > + i915_gem_clflush_init(dev_priv); > + > if (!i915.enable_execlists) { > dev_priv->gt.resume = intel_legacy_submission_resume; > dev_priv->gt.cleanup_engine = intel_engine_cleanup; > diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c > new file mode 100644 > index 000000000000..1a8fc58aa925 > --- /dev/null > +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c > @@ -0,0 +1,194 @@ > +/* > + * Copyright © 2016 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + */ > + > +#include "i915_drv.h" > +#include "intel_frontbuffer.h" > + > +static DEFINE_SPINLOCK(clflush_lock); > +static u64 clflush_context; > + > +struct clflushf { > + struct dma_fence dma; > + struct i915_sw_fence wait; > + struct work_struct work; > + struct drm_i915_gem_object *obj; > +}; > + > +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) > +{ > + return "i915"; > +} > + > +static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) > +{ > + return "clflush"; > +} > + > +static bool i915_clflush_enable_signaling(struct dma_fence *fence) > +{ > + return true; > +} > + > +static void i915_clflush_release(struct dma_fence *fence) > +{ > + struct clflushf *f = container_of(fence, typeof(*f), dma); > + > + i915_sw_fence_fini(&f->wait); > + dma_fence_free(&f->dma); Don't we need a kfree(f) somewhere? Also CI didn't look very happy, is this series meant to be standalone, or is there something else waiting in the wings? _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx