On Thu, Aug 08, 2013 at 02:41:05PM +0100, Chris Wilson wrote: > As mentioned in the previous commit, reads and writes from both the CPU > and GPU go through the LLC. This gives us coherency between the CPU and > GPU irrespective of the attribute settings either device sets. We can > use to avoid having to clflush even uncached memory. > > Except for the scanout. > > The scanout resides within another functional block that does not use > the LLC but reads directly from main memory. So in order to maintain > coherency with the scanout, writes to uncached memory must be flushed. > In order to optimize writes elsewhere, we start tracking whether an > framebuffer is attached to an object. > > v2: Use pin_display tracking rather than fb_count (to ensure we flush > cursors as well etc) and only force the clflush along explicit writes to > the scanout paths (i.e. pin_to_display_plane and pwrite into scanout). > > Based on a patch by Ville Syrjälä. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 +- > drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++-------------- > drivers/gpu/drm/i915/i915_gem_exec.c | 2 +- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- > drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- > 5 files changed, 36 insertions(+), 30 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 3622ec2..1ffae08 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1863,7 +1863,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) > } > > void i915_gem_reset(struct drm_device *dev); > -void i915_gem_clflush_object(struct drm_i915_gem_object *obj); > +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); > int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, > uint32_t read_domains, > uint32_t write_domain); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index c5e03ba..78535e9 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -38,7 +38,8 @@ > #include <linux/dma-buf.h> > > static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); > -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); > +static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, > + bool force); > static __must_check int > i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > @@ -68,6 +69,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, > return HAS_LLC(dev) || level != I915_CACHE_NONE; > } > > +static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) > +{ > + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) > + return true; > + > + return obj->pin_display; > +} > + > static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) > { > if (obj->tiling_mode) > @@ -830,8 +839,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, > * write domain and manually flush cachelines (if required). This > * optimizes for the case when the gpu will use the data > * right away and we therefore have to clflush anyway. */ > - if (obj->cache_level == I915_CACHE_NONE) > - needs_clflush_after = 1; > + needs_clflush_after = cpu_write_needs_clflush(obj); > if (i915_gem_obj_bound_any(obj)) { > ret = i915_gem_object_set_to_gtt_domain(obj, true); > if (ret) > @@ -921,7 +929,7 @@ out: > */ > if (!needs_clflush_after && > obj->base.write_domain != I915_GEM_DOMAIN_CPU) { > - i915_gem_clflush_object(obj); > + i915_gem_clflush_object(obj, false); Shouldn't that be i915_gem_clflush_object(obj, obj->pin_display) ? > i915_gem_chipset_flush(dev); > } > } -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx