This is pure evil. Userspace, I'm looking at you SNA, repacks batch buffers on the fly after generation as they are being passed to the kernel for execution. These batches also contain self-referenced relocations as a single buffer encompasses the state commands, kernels, vertices and sampler. During generation the buffers are placed at known offsets within the full batch, and then the relocation deltas (as passed to the kernel) are tweaked as the batch is repacked into a smaller buffer. This means that userspace is passing negative relocations deltas, which subsequently wrap to large values if the batch is at a low address. The GPU hangs when it then tries to use the large value as a base for its address offsets, rather than wrapping back to the real value (as one would hope). As the GPU uses positive offsets from the base, we can treat the relocation address as the minimum address read by the GPU. For the upper bound, we trust that userspace will not read beyond the end of the buffer. So, how do we fix negative relocations from wrapping? We can either check that every relocation looks valid when we write it, and then position each object such that we prevent the offset wraparound, or we just special-case the self-referential behaviour of SNA and force all batches to be above 256k. Daniel prefers the latter approach. This fixes a GPU hang when it tries to use an address (relocation + offset) greater than the GTT size. The issue would occur quite easily with full-ppgtt as each fd gets its own VM space, so low offsets would often be handed out. However, with the rearrangement of the low GTT due to capturing the BIOS framebuffer, it is already affecting kernels 3.15 onwards. I think only IVB+ is susceptible to this bug, but the workaround should only kick in rarely, so it seems sensible to always apply it. v3: Use a bias for batch buffers to prevent small negative delta relocations from wrapping. Testcase: igt/gem_bad_reloc Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78533 Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: stable@xxxxxxxxxxxxxxx --- drivers/gpu/drm/i915/i915_drv.h | 13 +++++++++---- drivers/gpu/drm/i915/i915_gem.c | 26 ++++++++++++++++---------- drivers/gpu/drm/i915/i915_gem_evict.c | 9 ++++----- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 21 ++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +++- 5 files changed, 50 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f2d95c56b0a9..64a1b2340e3a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2139,13 +2139,16 @@ void i915_init_vm(struct drm_i915_private *dev_priv, void i915_gem_free_object(struct drm_gem_object *obj); void i915_gem_vma_destroy(struct i915_vma *vma); -#define PIN_MAPPABLE 0x1 -#define PIN_NONBLOCK 0x2 -#define PIN_GLOBAL 0x4 +#define PIN_OFFSET_FIXED 0x1 +#define PIN_OFFSET_BIAS 0x2 +#define PIN_MAPPABLE 0x4 +#define PIN_NONBLOCK 0x8 +#define PIN_GLOBAL 0x10 +#define PIN_OFFSET_MASK (~4095) int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, uint32_t alignment, - unsigned flags); + uint64_t flags); int __must_check i915_vma_unbind(struct i915_vma *vma); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); @@ -2400,6 +2403,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size, unsigned alignment, unsigned cache_level, + unsigned long start, + unsigned long end, unsigned flags); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); int i915_gem_evict_everything(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 844ea6048321..79244b911125 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3259,12 +3259,14 @@ static struct i915_vma * i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, unsigned alignment, - unsigned flags) + uint64_t flags) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 size, fence_size, fence_alignment, unfenced_alignment; - size_t gtt_max = + unsigned long start = + flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + unsigned long end = flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3293,11 +3295,11 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, /* If the object is bigger than the entire aperture, reject it early * before evicting everything in a vain attempt to find space. */ - if (obj->base.size > gtt_max) { - DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", + if (obj->base.size > end) { + DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", obj->base.size, flags & PIN_MAPPABLE ? "mappable" : "total", - gtt_max); + end); return ERR_PTR(-E2BIG); } @@ -3314,12 +3316,15 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, search_free: ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, size, alignment, - obj->cache_level, 0, gtt_max, + obj->cache_level, + start, end, DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT); if (ret) { ret = i915_gem_evict_something(dev, vm, size, alignment, - obj->cache_level, flags); + obj->cache_level, + start, end, + flags); if (ret == 0) goto search_free; @@ -3886,7 +3891,7 @@ int i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, uint32_t alignment, - unsigned flags) + uint64_t flags) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; struct i915_vma *vma; @@ -3905,13 +3910,14 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, if ((alignment && vma->node.start & (alignment - 1)) || - (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) { + (flags & PIN_MAPPABLE && !obj->map_and_fenceable) || + (flags & PIN_OFFSET_BIAS && vma->node.start < (flags & PIN_OFFSET_MASK))) { WARN(vma->pin_count, "bo is already pinned with incorrect alignment:" " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", i915_gem_obj_offset(obj, vm), alignment, - flags & PIN_MAPPABLE, + !!(flags & PIN_MAPPABLE), obj->map_and_fenceable); ret = i915_vma_unbind(vma); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 75fca63dc8c1..bbf4b12d842e 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -68,9 +68,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) int i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, int min_size, unsigned alignment, unsigned cache_level, + unsigned long start, unsigned long end, unsigned flags) { - struct drm_i915_private *dev_priv = dev->dev_private; struct list_head eviction_list, unwind_list; struct i915_vma *vma; int ret = 0; @@ -102,11 +102,10 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, */ INIT_LIST_HEAD(&unwind_list); - if (flags & PIN_MAPPABLE) { - BUG_ON(!i915_is_ggtt(vm)); + if (start != 0 || end != vm->total) { drm_mm_init_scan_with_range(&vm->mm, min_size, - alignment, cache_level, 0, - dev_priv->gtt.mappable_end); + alignment, cache_level, + start, end); } else drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b4d8c31c0919..15cb3825dfef 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -36,6 +36,9 @@ #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) #define __EXEC_OBJECT_NEEDS_MAP (1<<29) +#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) + +#define BIAS (256*1024) struct eb_vmas { struct list_head vmas; @@ -540,7 +543,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - unsigned flags; + uint64_t flags; int ret; flags = 0; @@ -548,6 +551,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_MAPPABLE; if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; + if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) + flags |= BIAS | PIN_OFFSET_BIAS; ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); if (ret) @@ -672,7 +677,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, continue; if ((entry->alignment && vma->node.start & (entry->alignment - 1)) || - (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)) + (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) || + (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && vma->node.start < BIAS)) ret = i915_vma_unbind(vma); else ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); @@ -1035,6 +1041,15 @@ static int gen8_dispatch_bsd_ring(struct drm_device *dev, } } +struct drm_i915_gem_object * +eb_get_batch(struct eb_vmas *eb) +{ + struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); + if (INTEL_INFO(vma->obj->base.dev)->gen >= 7) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + return vma->obj; +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1215,7 +1230,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; /* take note of the batch buffer before we might reorder the lists */ - batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj; + batch_obj = eb_get_batch(eb); /* Move the objects en-masse into the GTT, evicting if necessary. */ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index afd4eef5db31..8a1e2516d20b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1049,7 +1049,9 @@ alloc: if (ret == -ENOSPC && !retried) { ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, GEN6_PD_SIZE, GEN6_PD_ALIGN, - I915_CACHE_NONE, 0); + I915_CACHE_NONE, + 0, dev_priv->gtt.base.total, + 0); if (ret) return ret; -- 2.0.0.rc2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx