If the object lies outside of the mappable GTT aperture, do not force it through the CPU domain for relocations, but simply flush the writes as we perform them and then queue a chipset flush. Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 87 ++++++++++++++++------------ 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 163bb52..daa5409 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -33,6 +33,9 @@ #include "intel_drv.h" #include <linux/dma_remapping.h> +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) + struct eb_objects { int and; struct hlist_head buckets[0]; @@ -95,10 +98,16 @@ eb_destroy(struct eb_objects *eb) static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || - !obj->map_and_fenceable || obj->cache_level != I915_CACHE_NONE); } +static inline struct page * +gtt_offset_to_page(struct drm_i915_gem_object *obj, u32 offset) +{ + offset -= obj->gtt_space->start; + return i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, @@ -182,22 +191,20 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EFAULT; reloc->delta += target_offset; + reloc->offset += obj->gtt_offset; if (use_cpu_reloc(obj)) { - uint32_t page_offset = reloc->offset & ~PAGE_MASK; char *vaddr; - ret = i915_gem_object_set_to_cpu_domain(obj, 1); + ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) return ret; - vaddr = kmap_atomic(i915_gem_object_get_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = reloc->delta; + vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset)); + *(uint32_t *)(vaddr + offset_in_page(reloc->offset)) = reloc->delta; kunmap_atomic(vaddr); } else { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t __iomem *reloc_entry; - void __iomem *reloc_page; + unsigned page_offset; ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -208,13 +215,28 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return ret; /* Map the page containing the relocation we're going to perform. */ - reloc->offset += obj->gtt_offset; - reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + (reloc->offset & ~PAGE_MASK)); - iowrite32(reloc->delta, reloc_entry); - io_mapping_unmap_atomic(reloc_page); + page_offset = offset_in_page(reloc->offset); + + if (reloc->offset < dev_priv->mm.gtt_mappable_end) { + void __iomem *reloc_page; + + reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + reloc->offset & PAGE_MASK); + iowrite32(reloc->delta, reloc_page + page_offset); + io_mapping_unmap_atomic(reloc_page); + } else { + char *vaddr; + + vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset)); + + drm_clflush_virt_range(vaddr + page_offset, 4); + *(uint32_t *)(vaddr + page_offset) = reloc->delta; + drm_clflush_virt_range(vaddr + page_offset, 4); + + kunmap_atomic(vaddr); + + obj->base.pending_write_domain |= I915_GEM_DOMAIN_CPU; + } } /* and update the user's relocation entry */ @@ -312,16 +334,6 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, return ret; } -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) - -static int -need_reloc_mappable(struct drm_i915_gem_object *obj) -{ - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - return entry->relocation_count && !use_cpu_reloc(obj); -} - static int i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring) @@ -329,16 +341,15 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = obj->base.dev->dev_private; struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - bool need_fence, need_mappable; + bool need_fence; int ret; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); - ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); + ret = i915_gem_object_pin(obj, entry->alignment, need_fence, false); if (ret) return ret; @@ -401,7 +412,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, INIT_LIST_HEAD(&ordered_objects); while (!list_empty(objects)) { struct drm_i915_gem_exec_object2 *entry; - bool need_fence, need_mappable; + bool need_fence; obj = list_first_entry(objects, struct drm_i915_gem_object, @@ -412,9 +423,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); - if (need_mappable) + if (need_fence) list_move(&obj->exec_list, &ordered_objects); else list_move_tail(&obj->exec_list, &ordered_objects); @@ -444,7 +454,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, /* Unbind any ill-fitting objects or pin. */ list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - bool need_fence, need_mappable; + bool need_fence; if (!obj->gtt_space) continue; @@ -453,10 +463,9 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || - (need_mappable && !obj->map_and_fenceable)) + (need_fence && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else ret = i915_gem_execbuffer_reserve_object(obj, ring); @@ -603,10 +612,16 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, if (ret) return ret; + flush_domains |= obj->base.write_domain; + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj); - flush_domains |= obj->base.write_domain; + /* Used as an internal marker during relocation processing */ + if (obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS) { + flush_domains |= obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS; + obj->base.pending_write_domain &= I915_GEM_GPU_DOMAINS; + } } if (flush_domains & I915_GEM_DOMAIN_CPU) @@ -923,7 +938,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } /* Set the pending read domains for the batch buffer to COMMAND */ - if (batch_obj->base.pending_write_domain) { + if (batch_obj->base.pending_write_domain & I915_GEM_GPU_DOMAINS) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; -- 1.7.10.4