Re: [CI 4/6] drm/i915: Always use the GTT for error capture

Daniel Vetter <daniel@xxxxxxxx> · Thu, 13 Oct 2016 17:02:50 +0200

On Wed, Oct 12, 2016 at 10:05:20AM +0100, Chris Wilson wrote:
> Since the GTT provides universal access to any GPU page, we can use it
> to reduce our plethora of read methods to just one. It also has the
> important characteristic of being exactly what the GPU sees - if there
> are incoherency problems, seeing the batch as executed (rather than as
> trapped inside the cpu cache) is important.
> 
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>

Meh, so much for me reading patches in order ;-)

Please reorder this one before the previous one and I'll be happy too.

> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c   |  43 ++++++++----
>  drivers/gpu/drm/i915/i915_gem_gtt.h   |   2 +
>  drivers/gpu/drm/i915/i915_gpu_error.c | 120 ++++++++++++----------------------
>  3 files changed, 74 insertions(+), 91 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 0bb4232f66bc..2d846aa39ca5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2717,6 +2717,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  	 */
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
>  	unsigned long hole_start, hole_end;
> +	struct i915_hw_ppgtt *ppgtt;
>  	struct drm_mm_node *entry;
>  	int ret;
>  
> @@ -2724,6 +2725,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  	if (ret)
>  		return ret;
>  
> +	/* Reserve a mappable slot for our lockless error capture */
> +	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
> +						  &ggtt->error_capture,
> +						  4096, 0, -1,
> +						  0, ggtt->mappable_end,
> +						  0, 0);
> +	if (ret)
> +		return ret;
> +
>  	/* Clear any non-preallocated blocks */
>  	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
>  		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
> @@ -2738,25 +2748,21 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  			       true);
>  
>  	if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
> -		struct i915_hw_ppgtt *ppgtt;
> -
>  		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
> -		if (!ppgtt)
> -			return -ENOMEM;
> +		if (!ppgtt) {
> +			ret = -ENOMEM;
> +			goto err;
> +		}
>  
>  		ret = __hw_ppgtt_init(ppgtt, dev_priv);
> -		if (ret) {
> -			kfree(ppgtt);
> -			return ret;
> -		}
> +		if (ret)
> +			goto err_ppgtt;
>  
> -		if (ppgtt->base.allocate_va_range)
> +		if (ppgtt->base.allocate_va_range) {
>  			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
>  							    ppgtt->base.total);
> -		if (ret) {
> -			ppgtt->base.cleanup(&ppgtt->base);
> -			kfree(ppgtt);
> -			return ret;
> +			if (ret)
> +				goto err_ppgtt_cleanup;
>  		}
>  
>  		ppgtt->base.clear_range(&ppgtt->base,
> @@ -2770,6 +2776,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
>  	}
>  
>  	return 0;
> +
> +err_ppgtt_cleanup:
> +	ppgtt->base.cleanup(&ppgtt->base);
> +err_ppgtt:
> +	kfree(ppgtt);
> +err:
> +	drm_mm_remove_node(&ggtt->error_capture);
> +	return ret;
>  }
>  
>  /**
> @@ -2788,6 +2802,9 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
>  
>  	i915_gem_cleanup_stolen(&dev_priv->drm);
>  
> +	if (drm_mm_node_allocated(&ggtt->error_capture))
> +		drm_mm_remove_node(&ggtt->error_capture);
> +
>  	if (drm_mm_initialized(&ggtt->base.mm)) {
>  		intel_vgt_deballoon(dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index ec78be2f8c77..bd93fb8f99d2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -450,6 +450,8 @@ struct i915_ggtt {
>  	bool do_idle_maps;
>  
>  	int mtrr;
> +
> +	struct drm_mm_node error_capture;
>  };
>  
>  struct i915_hw_ppgtt {
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 159d6d7e0cee..b3b2e6c1c6c6 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -656,7 +656,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj)
>  		return;
>  
>  	for (page = 0; page < obj->page_count; page++)
> -		kfree(obj->pages[page]);
> +		free_page((unsigned long)obj->pages[page]);
>  
>  	kfree(obj);
>  }
> @@ -693,98 +693,69 @@ static void i915_error_state_free(struct kref *error_ref)
>  	kfree(error);
>  }
>  
> +static int compress_page(void *src, struct drm_i915_error_object *dst)

Jumping ahead a bit with the name, but imo ok ...

> +{
> +	unsigned long page;
> +
> +	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
> +	if (!page)
> +		return -ENOMEM;
> +
> +	dst->pages[dst->page_count++] = (void *)page;
> +
> +	memcpy((void *)page, src, PAGE_SIZE);
> +	return 0;
> +}
> +
>  static struct drm_i915_error_object *
> -i915_error_object_create(struct drm_i915_private *dev_priv,
> +i915_error_object_create(struct drm_i915_private *i915,
>  			 struct i915_vma *vma)
>  {
> -	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -	struct drm_i915_gem_object *src;
> +	struct i915_ggtt *ggtt = &i915->ggtt;
> +	const u64 slot = ggtt->error_capture.start;
>  	struct drm_i915_error_object *dst;
> -	int num_pages;
> -	bool use_ggtt;
> -	int i = 0;
> -	u64 reloc_offset;
> +	unsigned long num_pages;
> +	struct sgt_iter iter;
> +	dma_addr_t dma;
>  
>  	if (!vma)
>  		return NULL;
>  
> -	src = vma->obj;
> -	if (!src->pages)
> -		return NULL;
> -
> -	num_pages = src->base.size >> PAGE_SHIFT;
> -
> -	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
> +	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
> +	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
> +		      GFP_ATOMIC | __GFP_NOWARN);
>  	if (!dst)
>  		return NULL;
>  
>  	dst->gtt_offset = vma->node.start;
>  	dst->gtt_size = vma->node.size;
> +	dst->page_count = 0;
>  
> -	reloc_offset = dst->gtt_offset;
> -	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> -		   (vma->flags & I915_VMA_GLOBAL_BIND) &&
> -		   reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end);
> -
> -	/* Cannot access stolen address directly, try to use the aperture */
> -	if (src->stolen) {
> -		use_ggtt = true;
> -
> -		if (!(vma->flags & I915_VMA_GLOBAL_BIND))
> -			goto unwind;
> -
> -		reloc_offset = vma->node.start;
> -		if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end)
> -			goto unwind;
> -	}
> +	for_each_sgt_dma(dma, iter, vma->pages) {
> +		void __iomem *s;
> +		int ret;
>  
> -	/* Cannot access snooped pages through the aperture */
> -	if (use_ggtt && src->cache_level != I915_CACHE_NONE &&
> -	    !HAS_LLC(dev_priv))
> -		goto unwind;
> +		ggtt->base.insert_page(&ggtt->base, dma, slot,
> +				       I915_CACHE_NONE, 0);
>  
> -	dst->page_count = num_pages;
> -	while (num_pages--) {
> -		void *d;
> +		s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
> +		ret = compress_page((void * __force)s, dst);
> +		io_mapping_unmap_atomic(s);
>  
> -		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
> -		if (d == NULL)
> +		if (ret)
>  			goto unwind;
> -
> -		if (use_ggtt) {
> -			void __iomem *s;
> -
> -			/* Simply ignore tiling or any overlapping fence.
> -			 * It's part of the error state, and this hopefully
> -			 * captures what the GPU read.
> -			 */
> -
> -			s = io_mapping_map_atomic_wc(&ggtt->mappable,
> -						     reloc_offset);
> -			memcpy_fromio(d, s, PAGE_SIZE);
> -			io_mapping_unmap_atomic(s);
> -		} else {
> -			struct page *page;
> -			void *s;
> -
> -			page = i915_gem_object_get_page(src, i);
> -
> -			s = kmap_atomic(page);
> -			memcpy(d, s, PAGE_SIZE);
> -			kunmap_atomic(s);
> -		}
> -
> -		dst->pages[i++] = d;
> -		reloc_offset += PAGE_SIZE;
>  	}
> -
> -	return dst;
> +	goto out;
>  
>  unwind:
> -	while (i--)
> -		kfree(dst->pages[i]);
> +	while (dst->page_count--)
> +		free_page((unsigned long)dst->pages[dst->page_count]);
>  	kfree(dst);
> -	return NULL;
> +	dst = NULL;
> +
> +out:
> +	ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true);
> +	return dst;
>  }
>  
>  /* The error capture is special as tries to run underneath the normal
> @@ -1445,9 +1416,6 @@ static int capture(void *data)
>  {
>  	struct drm_i915_error_state *error = data;
>  
> -	/* Ensure that what we readback from memory matches what the GPU sees */
> -	wbinvd();
> -
>  	i915_capture_gen_state(error->i915, error);
>  	i915_capture_reg_state(error->i915, error);
>  	i915_gem_record_fences(error->i915, error);
> @@ -1460,9 +1428,6 @@ static int capture(void *data)
>  	error->overlay = intel_overlay_capture_error_state(error->i915);
>  	error->display = intel_display_capture_error_state(error->i915);
>  
> -	/* And make sure we don't leave trash in the CPU cache */
> -	wbinvd();
> -
>  	return 0;
>  }
>  
> @@ -1539,7 +1504,6 @@ void i915_error_state_get(struct drm_device *dev,
>  	if (error_priv->error)
>  		kref_get(&error_priv->error->ref);
>  	spin_unlock_irq(&dev_priv->gpu_error.lock);
> -
>  }

Reviewed-by: Daniel Vetter <daniel.vetter@xxxxxxxx>
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx