On ma, 2017-02-20 at 20:59 +0000, Chris Wilson wrote: > Flushing the cachelines for an object is slow, can be as much as 100ms > for a large framebuffer. We currently do this under the struct_mutex BKL > on execution or on pageflip. But now with the ability to add fences to > obj->resv for both flips and execbuf (and we naturally wait on the fence > before CPU access), we can move the clflush operation to a workqueue and > signal a fence for completion, thereby doing the work asynchronously and > not blocking the driver or its clients. > > Suggested-by: Akash Goel <akash.goel@xxxxxxxxx> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Matthew Auld <matthew.auld@xxxxxxxxx> <SNIP> > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3389,7 +3389,13 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); > void i915_gem_reset(struct drm_i915_private *dev_priv); > void i915_gem_reset_finish(struct drm_i915_private *dev_priv); > void i915_gem_set_wedged(struct drm_i915_private *dev_priv); > -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); i915_gem_clflush.h > + > +void i915_gem_clflush_init(struct drm_i915_private *i915); > +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, > > + unsigned int flags); > +#define I915_CLFLUSH_FORCE BIT(0) > +#define I915_CLFLUSH_SYNC BIT(1) > + > void i915_gem_init_mmio(struct drm_i915_private *i915); > int __must_check i915_gem_init(struct drm_i915_private *dev_priv); > int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); <SNIP> > +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) > +{ > + return "i915"; Why not DRIVER_NAME? > +static void i915_clflush_release(struct dma_fence *fence) > +{ > + struct clflushf *f = container_of(fence, typeof(*f), dma); Better variable and struct name needed. > + > + i915_sw_fence_fini(&f->wait); > + > + BUILD_BUG_ON(offsetof(typeof(*f), dma)); Maybe make a comment at the struct definition, too? > +static bool cpu_cache_is_coherent(struct drm_device *dev, > + enum i915_cache_level level) > +{ > + return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; > +} We're not using this elsewhere? > + > +static void __i915_do_clflush(struct drm_i915_gem_object *obj) > +{ > + drm_clflush_sg(obj->mm.pages); > + obj->cache_dirty = false; > + > + intel_fb_obj_flush(obj, false, ORIGIN_CPU); This being hardcoded, use of ORIGIN_DIRTYFB disappears. Nobody is going to miss it? > +} > + > +static void i915_clflush_work(struct work_struct *work) > +{ > + struct clflushf *f = container_of(work, typeof(*f), work); > + struct drm_i915_gem_object *obj = f->obj; > + > + if (i915_gem_object_pin_pages(obj)) { > + obj->cache_dirty = true; > + goto out; > + } For the time being, I'd just WARN here. > + > + __i915_do_clflush(obj); > + > + i915_gem_object_unpin_pages(obj); > + > +out: > + i915_gem_object_put(obj); > + > + dma_fence_signal(&f->dma); > + dma_fence_put(&f->dma); > +} > + <SNIP> > @@ -14279,15 +14282,11 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, > struct drm_clip_rect *clips, > unsigned num_clips) > { > - struct drm_device *dev = fb->dev; > struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); > struct drm_i915_gem_object *obj = intel_fb->obj; > > - mutex_lock(&dev->struct_mutex); > if (obj->pin_display && obj->cache_dirty) > - i915_gem_clflush_object(obj, true); > - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); > - mutex_unlock(&dev->struct_mutex); > + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); You removed mutex_lock, add READ_ONCE for code coherency. With above addressed; Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Regards, Joonas > > return 0; > } -- Joonas Lahtinen Open Source Technology Center Intel Corporation _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx