On Thu, May 15, 2014 at 01:17:12PM +0100, Chris Wilson wrote: > This is pure evil. Userspace, I'm looking at you SNA, repacks batch > buffers on the fly after generation as they are being passed to the > kernel for execution. These batches also contain self-referenced > relocations as a single buffer encompasses the state commands, kernels, > vertices and sampler. During generation the buffers are placed at known > offsets within the full batch, and then the relocation deltas (as passed > to the kernel) are tweaked as the batch is repacked into a smaller buffer. > This means that userspace is passing negative relocations deltas, which > subsequently wrap to large values if the batch is at a low address. The > GPU hangs when it then tries to use the large value as a base for its > address offsets, rather than wrapping back to the real value (as one > would hope). As the GPU uses positive offsets from the base, we can > treat the relocation address as the minimum address read by the GPU. > For the upper bound, we trust that userspace will not read beyond the > end of the buffer. > > This fixes a GPU hang when it tries to use an address (relocation + > offset) greater than the GTT size. The issue would occur quite easily > with full-ppgtt as each fd gets its own VM space, so low offsets would > often be handed out. However, with the rearrangement of the low GTT due > to capturing the BIOS framebuffer, it is already affecting kernels 3.14 > onwards. I think only IVB+ is susceptible to this bug, but the workaround > should only kick in rarely, so it seems sensible to always apply it. > > v2: Apply the workaround for LUT-based execbuffers as well and only for > IVB+ as my SNB machine seems to be unaffected. > > Testcase: igt/gem_bad_reloc > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: stable@xxxxxxxxxxxxxxx Do we need to fix this in the kernel? Userspace supplying relocs that kinda don't work smells fishy ... -Daniel > --- > drivers/gpu/drm/i915/i915_gem.c | 15 ++++- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 91 ++++++++++++++++++++++++++++-- > 2 files changed, 97 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 1a271ee8dc22..ae751b73a087 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3626,8 +3626,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > struct drm_device *dev = obj->base.dev; > struct drm_i915_private *dev_priv = dev->dev_private; > u32 size, fence_size, fence_alignment, unfenced_alignment; > - size_t gtt_max = > + unsigned long gtt_max = > flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; > + unsigned long start = alignment; > struct i915_vma *vma; > int ret; > > @@ -3649,6 +3650,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > DRM_DEBUG("Invalid object alignment requested %u\n", alignment); > return ERR_PTR(-EINVAL); > } > + if (alignment >= gtt_max) { > + DRM_DEBUG("Alignment larger than the aperture: alignment=%d >= %s aperture=%lu\n", > + alignment, > + flags & PIN_MAPPABLE ? "mappable" : "total", > + gtt_max); > + return ERR_PTR(-EINVAL); > + } > > size = flags & PIN_MAPPABLE ? fence_size : obj->base.size; > > @@ -3656,7 +3664,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > * before evicting everything in a vain attempt to find space. > */ > if (obj->base.size > gtt_max) { > - DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", > + DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n", > obj->base.size, > flags & PIN_MAPPABLE ? "mappable" : "total", > gtt_max); > @@ -3692,7 +3700,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > search_free: > ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > size, alignment, > - obj->cache_level, 0, gtt_max, > + obj->cache_level, > + start, gtt_max, > DRM_MM_SEARCH_DEFAULT, > DRM_MM_CREATE_DEFAULT); > if (ret) { > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index e94aa365ae40..d37b54862d37 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -166,14 +166,12 @@ eb_lookup_vmas(struct eb_vmas *eb, > list_del_init(&obj->obj_exec_link); > > vma->exec_entry = &exec[i]; > - if (eb->and < 0) { > + vma->exec_handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; > + if (eb->and < 0) > eb->lut[i] = vma; > - } else { > - uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; > - vma->exec_handle = handle; > + else > hlist_add_head(&vma->exec_node, > - &eb->buckets[handle & eb->and]); > - } > + &eb->buckets[vma->exec_handle & eb->and]); > ++i; > } > > @@ -261,6 +259,19 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) > obj->cache_level != I915_CACHE_NONE); > } > > +static bool invalid_offset(struct drm_device *dev, uint64_t offset) > +{ > + const int gen = INTEL_INFO(dev)->gen; > + > + if (gen < 7) > + return false; > + > + if (gen < 8) > + offset = lower_32_bits(offset); > + > + return offset >= to_i915(dev)->gtt.base.total; > +} > + > static int > relocate_entry_cpu(struct drm_i915_gem_object *obj, > struct drm_i915_gem_relocation_entry *reloc, > @@ -272,6 +283,9 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj, > char *vaddr; > int ret; > > + if (invalid_offset(dev, delta)) > + return -EFAULT; > + > ret = i915_gem_object_set_to_cpu_domain(obj, true); > if (ret) > return ret; > @@ -309,6 +323,9 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, > void __iomem *reloc_page; > int ret; > > + if (invalid_offset(dev, delta)) > + return -EFAULT; > + > ret = i915_gem_object_set_to_gtt_domain(obj, true); > if (ret) > return ret; > @@ -702,6 +719,62 @@ err: > } > > static int > +i915_gem_execbuffer_relocate_check_slow(struct i915_vma *vma, > + struct drm_i915_gem_relocation_entry *relocs, > + int total) > +{ > + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; > + struct drm_i915_gem_object *obj = vma->obj; > + int32_t min = 0; > + int i, ret; > + > + /* This is pure evil. Userspace, I'm looking at you SNA, repacks > + * batch buffers on the fly after generation and before > + * being passed to the kernel for execution. These batches also > + * contain self-referenced relocations as a single buffer encompasses > + * the state commands, kernels, vertices and sampler. During > + * generation the buffers are placed at known offsets within the full > + * batch, and then the relocation deltas (as passed to the kernel) > + * are tweaked as the batch is repacked into a smaller buffer. > + * This means that userspace is passing negative relocations deltas, > + * which subsequently wrap to large values. The GPU hangs when it > + * then tries to use the large value as a base for the address offset, > + * rather than wrapping back to the real value (as one would hope). > + */ > + for (i = 0; i < total; i++) { > + struct drm_i915_gem_relocation_entry *reloc = &relocs[i]; > + int32_t sdelta; > + > + if (reloc->target_handle != vma->exec_handle) > + continue; > + > + sdelta = reloc->delta; > + if (sdelta < min) > + min = sdelta; > + } > + if (min < 0) { > + if (drm_mm_node_allocated(&vma->node) && > + invalid_offset(obj->base.dev, vma->node.start + (uint32_t)min)) { > + ret = i915_vma_unbind(vma); > + if (ret) > + return ret; > + } > + if (!drm_mm_node_allocated(&vma->node)) { > + if (entry->alignment == 0) > + entry->alignment = > + i915_gem_get_gtt_alignment(obj->base.dev, > + obj->base.size, > + obj->tiling_mode, > + entry->flags & __EXEC_OBJECT_NEEDS_MAP); > + while (entry->alignment < -min) > + entry->alignment <<= 1; > + } > + } > + > + return 0; > +} > + > +static int > i915_gem_execbuffer_relocate_slow(struct drm_device *dev, > struct drm_i915_gem_execbuffer2 *args, > struct drm_file *file, > @@ -792,6 +865,12 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, > if (ret) > goto err; > > + list_for_each_entry(vma, &eb->vmas, exec_list) { > + ret = i915_gem_execbuffer_relocate_check_slow(vma, reloc, total); > + if (ret) > + goto err; > + } > + > need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; > ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs); > if (ret) > -- > 2.0.0.rc2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html