Re: [Intel-gfx] [PATCH] drm/i915: Prevent negative relocation deltas from wrapping

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, May 15, 2014 at 01:17:12PM +0100, Chris Wilson wrote:
> This is pure evil. Userspace, I'm looking at you SNA, repacks batch
> buffers on the fly after generation as they are being passed to the
> kernel for execution. These batches also contain self-referenced
> relocations as a single buffer encompasses the state commands, kernels,
> vertices and sampler. During generation the buffers are placed at known
> offsets within the full batch, and then the relocation deltas (as passed
> to the kernel) are tweaked as the batch is repacked into a smaller buffer.
> This means that userspace is passing negative relocations deltas, which
> subsequently wrap to large values if the batch is at a low address. The
> GPU hangs when it then tries to use the large value as a base for its
> address offsets, rather than wrapping back to the real value (as one
> would hope). As the GPU uses positive offsets from the base, we can
> treat the relocation address as the minimum address read by the GPU.
> For the upper bound, we trust that userspace will not read beyond the
> end of the buffer.
> 
> This fixes a GPU hang when it tries to use an address (relocation +
> offset) greater than the GTT size. The issue would occur quite easily
> with full-ppgtt as each fd gets its own VM space, so low offsets would
> often be handed out. However, with the rearrangement of the low GTT due
> to capturing the BIOS framebuffer, it is already affecting kernels 3.14
> onwards. I think only IVB+ is susceptible to this bug, but the workaround
> should only kick in rarely, so it seems sensible to always apply it.
> 
> v2: Apply the workaround for LUT-based execbuffers as well and only for
> IVB+ as my SNB machine seems to be unaffected.
> 
> Testcase: igt/gem_bad_reloc
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx

Do we need to fix this in the kernel? Userspace supplying relocs that
kinda don't work smells fishy ...
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c            | 15 ++++-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 91 ++++++++++++++++++++++++++++--
>  2 files changed, 97 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1a271ee8dc22..ae751b73a087 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3626,8 +3626,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	u32 size, fence_size, fence_alignment, unfenced_alignment;
> -	size_t gtt_max =
> +	unsigned long gtt_max =
>  		flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
> +	unsigned long start = alignment;
>  	struct i915_vma *vma;
>  	int ret;
>  
> @@ -3649,6 +3650,13 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  		DRM_DEBUG("Invalid object alignment requested %u\n", alignment);
>  		return ERR_PTR(-EINVAL);
>  	}
> +	if (alignment >= gtt_max) {
> +		DRM_DEBUG("Alignment larger than the aperture: alignment=%d >= %s aperture=%lu\n",
> +			  alignment,
> +			  flags & PIN_MAPPABLE ? "mappable" : "total",
> +			  gtt_max);
> +		return ERR_PTR(-EINVAL);
> +	}
>  
>  	size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
>  
> @@ -3656,7 +3664,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  	 * before evicting everything in a vain attempt to find space.
>  	 */
>  	if (obj->base.size > gtt_max) {
> -		DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
> +		DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
>  			  obj->base.size,
>  			  flags & PIN_MAPPABLE ? "mappable" : "total",
>  			  gtt_max);
> @@ -3692,7 +3700,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  search_free:
>  		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
>  							  size, alignment,
> -							  obj->cache_level, 0, gtt_max,
> +							  obj->cache_level,
> +							  start, gtt_max,
>  							  DRM_MM_SEARCH_DEFAULT,
>  							  DRM_MM_CREATE_DEFAULT);
>  		if (ret) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e94aa365ae40..d37b54862d37 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -166,14 +166,12 @@ eb_lookup_vmas(struct eb_vmas *eb,
>  		list_del_init(&obj->obj_exec_link);
>  
>  		vma->exec_entry = &exec[i];
> -		if (eb->and < 0) {
> +		vma->exec_handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
> +		if (eb->and < 0)
>  			eb->lut[i] = vma;
> -		} else {
> -			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
> -			vma->exec_handle = handle;
> +		else
>  			hlist_add_head(&vma->exec_node,
> -				       &eb->buckets[handle & eb->and]);
> -		}
> +				       &eb->buckets[vma->exec_handle & eb->and]);
>  		++i;
>  	}
>  
> @@ -261,6 +259,19 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
>  		obj->cache_level != I915_CACHE_NONE);
>  }
>  
> +static bool invalid_offset(struct drm_device *dev, uint64_t offset)
> +{
> +	const int gen = INTEL_INFO(dev)->gen;
> +
> +	if (gen < 7)
> +		return false;
> +
> +	if (gen < 8)
> +		offset = lower_32_bits(offset);
> +
> +	return offset >= to_i915(dev)->gtt.base.total;
> +}
> +
>  static int
>  relocate_entry_cpu(struct drm_i915_gem_object *obj,
>  		   struct drm_i915_gem_relocation_entry *reloc,
> @@ -272,6 +283,9 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
>  	char *vaddr;
>  	int ret;
>  
> +	if (invalid_offset(dev, delta))
> +		return -EFAULT;
> +
>  	ret = i915_gem_object_set_to_cpu_domain(obj, true);
>  	if (ret)
>  		return ret;
> @@ -309,6 +323,9 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
>  	void __iomem *reloc_page;
>  	int ret;
>  
> +	if (invalid_offset(dev, delta))
> +		return -EFAULT;
> +
>  	ret = i915_gem_object_set_to_gtt_domain(obj, true);
>  	if (ret)
>  		return ret;
> @@ -702,6 +719,62 @@ err:
>  }
>  
>  static int
> +i915_gem_execbuffer_relocate_check_slow(struct i915_vma *vma,
> +					struct drm_i915_gem_relocation_entry *relocs,
> +					int total)
> +{
> +	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +	struct drm_i915_gem_object *obj = vma->obj;
> +	int32_t min = 0;
> +	int i, ret;
> +
> +	/* This is pure evil. Userspace, I'm looking at you SNA, repacks
> +	 * batch buffers on the fly after generation and before
> +	 * being passed to the kernel for execution. These batches also
> +	 * contain self-referenced relocations as a single buffer encompasses
> +	 * the state commands, kernels, vertices and sampler. During
> +	 * generation the buffers are placed at known offsets within the full
> +	 * batch, and then the relocation deltas (as passed to the kernel)
> +	 * are tweaked as the batch is repacked into a smaller buffer.
> +	 * This means that userspace is passing negative relocations deltas,
> +	 * which subsequently wrap to large values. The GPU hangs when it
> +	 * then tries to use the large value as a base for the address offset,
> +	 * rather than wrapping back to the real value (as one would hope).
> +	 */
> +	for (i = 0; i < total; i++) {
> +		struct drm_i915_gem_relocation_entry *reloc = &relocs[i];
> +		int32_t sdelta;
> +
> +		if (reloc->target_handle != vma->exec_handle)
> +			continue;
> +
> +		sdelta = reloc->delta;
> +		if (sdelta < min)
> +			min = sdelta;
> +	}
> +	if (min < 0) {
> +		if (drm_mm_node_allocated(&vma->node) &&
> +		    invalid_offset(obj->base.dev, vma->node.start + (uint32_t)min)) {
> +			ret = i915_vma_unbind(vma);
> +			if (ret)
> +				return ret;
> +		}
> +		if (!drm_mm_node_allocated(&vma->node)) {
> +			if (entry->alignment == 0)
> +				entry->alignment =
> +					i915_gem_get_gtt_alignment(obj->base.dev,
> +								   obj->base.size,
> +								   obj->tiling_mode,
> +								   entry->flags & __EXEC_OBJECT_NEEDS_MAP);
> +			while (entry->alignment < -min)
> +				entry->alignment <<= 1;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int
>  i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
>  				  struct drm_i915_gem_execbuffer2 *args,
>  				  struct drm_file *file,
> @@ -792,6 +865,12 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
>  	if (ret)
>  		goto err;
>  
> +	list_for_each_entry(vma, &eb->vmas, exec_list) {
> +		ret = i915_gem_execbuffer_relocate_check_slow(vma, reloc, total);
> +		if (ret)
> +			goto err;
> +	}
> +
>  	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
>  	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
>  	if (ret)
> -- 
> 2.0.0.rc2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]