This is a companion to i915_gem_obj_prepare_shmem_read() that prepares the backing storage for direct writes. It first serialises with the GPU, pins the backing storage and then indicates what clfushes are required in order for the writes to be coherent. Whilst here, fix support for ancient CPUs without clflush for which we cannot do the GTT+clflush tricks. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 6 +- drivers/gpu/drm/i915/i915_gem.c | 119 +++++++++++++++++++++------------ 3 files changed, 83 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index b0fd6a7b0603..de038da6c01b 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -970,7 +970,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, u32 batch_start_offset, u32 batch_len) { - int needs_clflush = 0; + unsigned needs_clflush; void *src_base, *src; void *dst = NULL; int ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b333cf4923bc..ef321f84e5fc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3026,7 +3026,11 @@ void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, - int *needs_clflush); + unsigned *needs_clflush); +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, + unsigned *needs_clflush); +#define CLFLUSH_BEFORE 0x1 +#define CLFLUSH_AFTER 0x2 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a41fa01eb024..c6d06cb21191 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -563,34 +563,95 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, * flush the object from the CPU cache. */ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, - int *needs_clflush) + unsigned *needs_clflush) { int ret; *needs_clflush = 0; - - if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0)) + if ((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0) return -EINVAL; if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, obj->cache_level); - ret = i915_gem_object_wait_rendering(obj, true); + } + + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + + if (*needs_clflush && !boot_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, false); + if (ret) { + i915_gem_object_unpin_pages(obj); + return ret; + } + *needs_clflush = 0; + } + + return 0; +} + +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, + unsigned *needs_clflush) +{ + int ret; + + *needs_clflush = 0; + if ((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0) + return -EINVAL; + + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; + + /* If we're not in the cpu write domain, set ourself into the + * gtt write domain and manually flush cachelines (as required). + * This optimizes for the case when the gpu will use the data + * right away and we therefore have to clflush anyway. + */ + *needs_clflush |= cpu_write_needs_clflush(obj) << 1; } + /* Same trick applies to invalidate partially written cachelines read + * before writing. + */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) + *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, + obj->cache_level); + ret = i915_gem_object_get_pages(obj); if (ret) return ret; i915_gem_object_pin_pages(obj); - return ret; + if (*needs_clflush && !boot_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) { + i915_gem_object_unpin_pages(obj); + return ret; + } + *needs_clflush = 0; + } + + if ((*needs_clflush & CLFLUSH_AFTER) == 0) + obj->cache_dirty = true; + + intel_fb_obj_invalidate(obj, ORIGIN_CPU); + obj->dirty = 1; + return 0; } /* Per-page copy function for the shmem pread fastpath. @@ -999,41 +1060,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; int hit_slowpath = 0; - int needs_clflush_after = 0; - int needs_clflush_before = 0; + unsigned needs_clflush; struct sg_page_iter sg_iter; - user_data = u64_to_user_ptr(args->data_ptr); - remain = args->size; - - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - /* If we're not in the cpu write domain, set ourself into the gtt - * write domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will use the data - * right away and we therefore have to clflush anyway. */ - needs_clflush_after = cpu_write_needs_clflush(obj); - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - } - /* Same trick applies to invalidate partially written cachelines read - * before writing. */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) - needs_clflush_before = - !cpu_cache_is_coherent(dev, obj->cache_level); - - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); if (ret) return ret; - intel_fb_obj_invalidate(obj, ORIGIN_CPU); - - i915_gem_object_pin_pages(obj); - + obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + user_data = u64_to_user_ptr(args->data_ptr); offset = args->offset; - obj->dirty = 1; + remain = args->size; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, offset >> PAGE_SHIFT) { @@ -1057,7 +1094,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, /* If we don't overwrite a cacheline completely we need to be * careful to have up-to-date data by first clflushing. Don't * overcomplicate things and flush the entire patch. */ - partial_cacheline_write = needs_clflush_before && + partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE && ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1)); @@ -1067,7 +1104,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, - needs_clflush_after); + needs_clflush & CLFLUSH_AFTER); if (ret == 0) goto next_page; @@ -1076,7 +1113,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, - needs_clflush_after); + needs_clflush & CLFLUSH_AFTER); mutex_lock(&dev->struct_mutex); @@ -1098,17 +1135,15 @@ out: * cachelines in-line while writing and the object moved * out of the cpu write domain while we've dropped the lock. */ - if (!needs_clflush_after && + if (!(needs_clflush & CLFLUSH_AFTER) && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { if (i915_gem_clflush_object(obj, obj->pin_display)) - needs_clflush_after = true; + needs_clflush |= CLFLUSH_AFTER; } } - if (needs_clflush_after) + if (needs_clflush & CLFLUSH_AFTER) i915_gem_chipset_flush(to_i915(dev)); - else - obj->cache_dirty = true; intel_fb_obj_flush(obj, false, ORIGIN_CPU); return ret; -- 2.8.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx