Chris, I tested this together with your v3 (Mark cache dirty...) patch and verified tests are all passing. Tested-by : Dongwon Kim <dongwon.kim@xxxxxxxxx> On Thu, May 18, 2017 at 10:46:17AM +0100, Chris Wilson wrote: > For ease of use (i.e. avoiding a few checks and function calls), store > the object's cache coherency next to the cache is dirty bit. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Dongwon Kim <dongwon.kim@xxxxxxxxx> > Cc: Matt Roper <matthew.d.roper@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem.c | 14 +++++++------- > drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- > drivers/gpu/drm/i915/i915_gem_internal.c | 3 ++- > drivers/gpu/drm/i915/i915_gem_object.h | 1 + > drivers/gpu/drm/i915/i915_gem_stolen.c | 1 + > drivers/gpu/drm/i915/i915_gem_userptr.c | 3 ++- > drivers/gpu/drm/i915/selftests/huge_gem_object.c | 3 ++- > 8 files changed, 17 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 155dd52f2d18..870659c13de3 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -52,7 +52,7 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) > if (obj->cache_dirty) > return false; > > - if (!i915_gem_object_is_coherent(obj)) > + if (!obj->cache_coherent) > return true; > > return obj->pin_display; > @@ -253,7 +253,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, > > if (needs_clflush && > (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && > - !i915_gem_object_is_coherent(obj)) > + !obj->cache_coherent) > drm_clflush_sg(pages); > > __start_cpu_write(obj); > @@ -856,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, > if (ret) > return ret; > > - if (i915_gem_object_is_coherent(obj) || > - !static_cpu_has(X86_FEATURE_CLFLUSH)) { > + if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { > ret = i915_gem_object_set_to_cpu_domain(obj, false); > if (ret) > goto err_unpin; > @@ -909,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, > if (ret) > return ret; > > - if (i915_gem_object_is_coherent(obj) || > - !static_cpu_has(X86_FEATURE_CLFLUSH)) { > + if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) { > ret = i915_gem_object_set_to_cpu_domain(obj, true); > if (ret) > goto err_unpin; > @@ -3661,6 +3659,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, > list_for_each_entry(vma, &obj->vma_list, obj_link) > vma->node.color = cache_level; > obj->cache_level = cache_level; > + obj->cache_coherent = i915_gem_object_is_coherent(obj); > obj->cache_dirty = true; /* Always invalidate stale cachelines */ > > return 0; > @@ -4320,7 +4319,8 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size) > } else > obj->cache_level = I915_CACHE_NONE; > > - obj->cache_dirty = !i915_gem_object_is_coherent(obj); > + obj->cache_coherent = i915_gem_object_is_coherent(obj); > + obj->cache_dirty = !obj->cache_coherent; > > trace_i915_gem_object_create(obj); > > diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c > index 17b207e963c2..152f16c11878 100644 > --- a/drivers/gpu/drm/i915/i915_gem_clflush.c > +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c > @@ -139,7 +139,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, > * snooping behaviour occurs naturally as the result of our domain > * tracking. > */ > - if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj)) > + if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent) > return; > > trace_i915_gem_object_clflush(obj); > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 0b8ae0f56675..2e5f513087a8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -1129,7 +1129,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, > if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) > continue; > > - if (obj->cache_dirty) > + if (obj->cache_dirty & ~obj->cache_coherent) > i915_gem_clflush_object(obj, 0); > > ret = i915_gem_request_await_object > diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c > index 58e93e87d573..568bf83af1f5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_internal.c > +++ b/drivers/gpu/drm/i915/i915_gem_internal.c > @@ -191,7 +191,8 @@ i915_gem_object_create_internal(struct drm_i915_private *i915, > obj->base.read_domains = I915_GEM_DOMAIN_CPU; > obj->base.write_domain = I915_GEM_DOMAIN_CPU; > obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; > - obj->cache_dirty = !i915_gem_object_is_coherent(obj); > + obj->cache_coherent = i915_gem_object_is_coherent(obj); > + obj->cache_dirty = !obj->cache_coherent; > > return obj; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h > index 174cf923c236..dca15adc91de 100644 > --- a/drivers/gpu/drm/i915/i915_gem_object.h > +++ b/drivers/gpu/drm/i915/i915_gem_object.h > @@ -106,6 +106,7 @@ struct drm_i915_gem_object { > unsigned long gt_ro:1; > unsigned int cache_level:3; > unsigned int cache_dirty:1; > + unsigned int cache_coherent:1; > > atomic_t frontbuffer_bits; > unsigned int frontbuffer_ggtt_origin; /* write once */ > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c > index f3abdc27c5dd..68af4a39973d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c > @@ -592,6 +592,7 @@ _i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, > obj->stolen = stolen; > obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; > obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE; > + obj->cache_coherent = true; /* assumptions! more like cache_oblivious */ > > if (i915_gem_object_pin_pages(obj)) > goto cleanup; > diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > index 9f84be171ad2..4ec9a04aa165 100644 > --- a/drivers/gpu/drm/i915/i915_gem_userptr.c > +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > @@ -805,7 +805,8 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file > obj->base.read_domains = I915_GEM_DOMAIN_CPU; > obj->base.write_domain = I915_GEM_DOMAIN_CPU; > obj->cache_level = I915_CACHE_LLC; > - obj->cache_dirty = !i915_gem_object_is_coherent(obj); > + obj->cache_coherent = i915_gem_object_is_coherent(obj); > + obj->cache_dirty = !obj->cache_coherent; > > obj->userptr.ptr = args->user_ptr; > obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY); > diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c > index 0ca867a877b6..caf76af36aba 100644 > --- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c > +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c > @@ -129,7 +129,8 @@ huge_gem_object(struct drm_i915_private *i915, > obj->base.read_domains = I915_GEM_DOMAIN_CPU; > obj->base.write_domain = I915_GEM_DOMAIN_CPU; > obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; > - obj->cache_dirty = !i915_gem_object_is_coherent(obj); > + obj->cache_coherent = i915_gem_object_is_coherent(obj); > + obj->cache_dirty = !obj->cache_coherent; > obj->scratch = phys_size; > > return obj; > -- > 2.11.0 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx