Since userspace has the ability to bypass the CPU cache from within its unprivileged command stream, we have to flush the CPU cache to memory in order to overwrite the previous contents on creation. We enforce this at the boundary points (get/put pages) to ensure that before recycling system pages we are always cache coherent. v3: We now always clflush on acquisition and release of system pages, and include a clflush counting selftest to make sure we do. This also succinctly covers swap-in/swap-out. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> Cc: Francisco Jerez <currojerez@xxxxxxxxxx> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx> --- Remember the difference in clflush behaviour between llc and !llc. --- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 8 ++ drivers/gpu/drm/i915/gem/i915_gem_clflush.h | 6 + drivers/gpu/drm/i915/gem/i915_gem_pages.c | 7 +- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 22 +++- .../i915/gem/selftests/i915_gem_coherency.c | 113 ++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 2 + 6 files changed, 153 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index b9f504ba3b32..e2434e17ffc1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -16,11 +16,19 @@ struct clflush { struct drm_i915_gem_object *obj; }; +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +void st_clflush_inc(struct drm_i915_gem_object *obj) +{ + atomic_inc(&to_i915(obj->base.dev)->gem.clflushes); +} +#endif + static void __do_clflush(struct drm_i915_gem_object *obj) { GEM_BUG_ON(!i915_gem_object_has_pages(obj)); drm_clflush_sg(obj->mm.pages); intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU); + st_clflush_inc(obj); } static int clflush_work(struct dma_fence_work *base) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h index e6c382973129..7434d878a553 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h @@ -17,4 +17,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, #define I915_CLFLUSH_FORCE BIT(0) #define I915_CLFLUSH_SYNC BIT(1) +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +void st_clflush_inc(struct drm_i915_gem_object *obj); +#else +static inline void st_clflush_inc(struct drm_i915_gem_object *obj) { } +#endif + #endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index f402c2c415c2..8bcf9b65d661 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -5,9 +5,10 @@ */ #include "i915_drv.h" +#include "i915_gem_clflush.h" +#include "i915_gem_lmem.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" -#include "i915_gem_lmem.h" void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, @@ -25,8 +26,10 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, /* Make the pages coherent with the GPU (flushing any swapin). */ if (obj->cache_dirty) { obj->write_domain = 0; - if (i915_gem_object_has_struct_page(obj)) + if (i915_gem_object_has_struct_page(obj)) { drm_clflush_sg(pages); + st_clflush_inc(obj); + } obj->cache_dirty = false; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4d69c3fc3439..9db53a4d1b2e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -10,6 +10,7 @@ #include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_gemfs.h" +#include "i915_gem_clflush.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" #include "i915_trace.h" @@ -183,6 +184,15 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); + /* + * Since userspace has the ability to bypass the CPU cache from within + * its unprivileged command stream, we have to flush the CPU cache to + * memory in order to overwrite the previous contents on creation, so + * that userspace cannot snoop on the old system pages just handed to + * us. + */ + obj->cache_dirty = true; /* For drm_clflush_sg() inside set_pages */ + __i915_gem_object_set_pages(obj, st, sg_page_sizes); return 0; @@ -285,10 +295,16 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (obj->mm.madv == I915_MADV_DONTNEED) obj->mm.dirty = false; - if (needs_clflush && - (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + /* + * Unless we know for certain that the main memory contents are + * coherent with the CPU cache, flush the CPU cache. This ensures + * that userspace that has bypassed the CPU cache to write into + * main memory does not leak the contents of its CPU cache. + */ + if (obj->write_domain != I915_GEM_DOMAIN_CPU) { drm_clflush_sg(pages); + st_clflush_inc(obj); + } __start_cpu_write(obj); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 2b29f6b4e1dd..20c76a8f81fd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -6,6 +6,8 @@ #include <linux/prime_numbers.h> +#include "gem/i915_gem_clflush.h" +#include "gem/i915_gem_object.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_ring.h" @@ -297,6 +299,116 @@ random_engine(struct drm_i915_private *i915, struct rnd_state *prng) return NULL; } +static int checked_clflush(struct drm_i915_gem_object *obj, + unsigned int flags, + int expected) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int clflushes = atomic_read(&i915->gem.clflushes); + int err = 0; + + i915_gem_object_lock(obj); + + i915_gem_clflush_object(obj, flags); + + if (obj->cache_dirty) { + pr_err("clflush did not clear obj->cache_dirty!\n"); + err = -EINVAL; + goto unlock; + } + + if (atomic_read(&i915->gem.clflushes) != clflushes + expected) { + pr_err("clflush counter did not match!\n"); + err = -EINVAL; + goto unlock; + } + +unlock: + i915_gem_object_unlock(obj); + return err; +} + +static int igt_gem_clflush(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + unsigned int clflushes; + int err = 0; + + /* + * Before the data is visible to the GPU, we sometimes have to + * flush the CPU cache. For example, only with the introduction + * of LLC with Sandybride could the GPU and CPU caches be fully + * coherent. However, the display engine (scanout) has different + * coherency with the GPU then the CPU, and even if the CPU/GPU + * could talk to each other we would still need to flush the CPU + * caches for scanout. Finally, userspace can bypass the cache + * snooping altogether from inside its unprivileged batch bufers. + * + * We use drm_clflush_*() to clear the CPU cachelines on such + * transitions, so we can assert that we will do so by setting + * up the dirty state and looking at our clflush counter + */ + + /* + * Creating a shmem object from system pages, we must assume + * the CPU cache is dirty. That is the main memory can have + * different (stale) content than the CPU, as the kernel + * expects all reads to come from the CPU cache. As we know the GPU + * may ignore the CPU cache and read from memory instead, we must + * assume the worst. + */ + obj = i915_gem_object_create_shmem(i915, 4096); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + /* We always clflush on acquisition of the backing storage */ + clflushes = atomic_read(&i915->gem.clflushes); + err = i915_gem_object_pin_pages(obj); + if (err) + goto err; + + if (atomic_read(&i915->gem.clflushes) == clflushes) { + pr_err("no clflush recorded for page acquisition\n"); + err = -EINVAL; + goto err; + } + if (obj->cache_dirty) { + pr_err("fresh pages, but CPU cache is dirty!\n"); + err = -EINVAL; + goto err; + } + + /* An _unforced_ clflush is ignored on llc */ + err = checked_clflush(obj, I915_CLFLUSH_SYNC, !HAS_LLC(i915)); + if (err) { + pr_err("redundant clflush failed!\n"); + goto err; + } + + /* But a _forced_ clflush is obeyed, even if we think its clean */ + err = checked_clflush(obj, I915_CLFLUSH_SYNC | I915_CLFLUSH_FORCE, 1); + if (err) { + pr_err("forced clflush failed!\n"); + goto err; + } + + /* Releasing a shmem object must flush (in case of user bypass) */ + clflushes = atomic_read(&i915->gem.clflushes); + i915_gem_object_put(obj); + i915_gem_drain_freed_objects(i915); + if (atomic_read(&i915->gem.clflushes) == clflushes) { + pr_err("clflush not recorded for releasing dirty uncached object\n"); + err = -EINVAL; + } + + goto out; +err: + i915_gem_object_put(obj); +out: + return err; +} + static int igt_gem_coherency(void *arg) { const unsigned int ncachelines = PAGE_SIZE/64; @@ -415,6 +527,7 @@ static int igt_gem_coherency(void *arg) int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_clflush), SUBTEST(igt_gem_coherency), }; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 50809ffdc6b2..0ebd43b79813 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1254,6 +1254,8 @@ struct drm_i915_private { struct llist_head free_list; struct work_struct free_work; } contexts; + + I915_SELFTEST_DECLARE(atomic_t clflushes;) } gem; u8 pch_ssc_use; -- 2.24.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx