Re: [PATCH 1/7] drm/i915/selftests: Flush old resets between engines

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes:

> When injecting rapid resets, we must be careful to at least wait for the
> previous reset to have taken effect and the engine restarted. If we
> perform a second reset before that has happened, we will notice that the
> engine hasn't recovered and declare it lost, wedging the device and
> failing. In practice, since we wait for each hanging batch to start
> before injecting the reset, this too-fast-reset condition can only be
> triggered when moving onto the next engine in the test, so we need only
> wait for the existing reset to complete before switching engines.
>
> v2: Wrap up the wait inside a safety net to bail out in case of angry hw.
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
> Cc: Michel Thierry <michel.thierry@xxxxxxxxx>

Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>

> ---
>  drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 65 ++++++++++++++++++++++--
>  1 file changed, 62 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index d1f91a533afa..a4f4ff22389b 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -244,6 +244,57 @@ static u32 hws_seqno(const struct hang *h,
>  	return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
>  }
>  
> +struct wedge_me {
> +	struct delayed_work work;
> +	struct drm_i915_private *i915;
> +	const void *symbol;
> +};
> +
> +static void wedge_me(struct work_struct *work)
> +{
> +	struct wedge_me *w = container_of(work, typeof(*w), work.work);
> +
> +	pr_err("%pS timed out, cancelling all further testing.\n",
> +	       w->symbol);
> +	i915_gem_set_wedged(w->i915);
> +}
> +
> +static void __init_wedge(struct wedge_me *w,
> +			 struct drm_i915_private *i915,
> +			 long timeout,
> +			 const void *symbol)
> +{
> +	w->i915 = i915;
> +	w->symbol = symbol;
> +
> +	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
> +	schedule_delayed_work(&w->work, timeout);
> +}
> +
> +static void __fini_wedge(struct wedge_me *w)
> +{
> +	cancel_delayed_work_sync(&w->work);
> +	destroy_delayed_work_on_stack(&w->work);
> +	w->i915 = NULL;
> +}
> +
> +#define wedge_on_timeout(W, DEV, TIMEOUT)				\
> +	for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \
> +	     (W)->i915;							\
> +	     __fini_wedge((W)))
> +
> +static int flush_test(struct drm_i915_private *i915, unsigned int flags)
> +{
> +	struct wedge_me w;
> +
> +	cond_resched();
> +
> +	wedge_on_timeout(&w, i915, HZ)
> +		i915_gem_wait_for_idle(i915, flags);
> +
> +	return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
> +}
> +
>  static void hang_fini(struct hang *h)
>  {
>  	*h->batch = MI_BATCH_BUFFER_END;
> @@ -255,7 +306,7 @@ static void hang_fini(struct hang *h)
>  	i915_gem_object_unpin_map(h->hws);
>  	i915_gem_object_put(h->hws);
>  
> -	i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
> +	flush_test(h->i915, I915_WAIT_LOCKED);
>  }
>  
>  static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
> @@ -487,7 +538,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>  		if (err)
>  			break;
>  
> -		cond_resched();
> +		err = flush_test(i915, 0);
> +		if (err)
> +			break;
>  	}
>  
>  	if (i915_terminally_wedged(&i915->gpu_error))
> @@ -726,7 +779,9 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915,
>  		if (err)
>  			break;
>  
> -		cond_resched();
> +		err = flush_test(i915, 0);
> +		if (err)
> +			break;
>  	}
>  
>  	if (i915_terminally_wedged(&i915->gpu_error))
> @@ -952,6 +1007,10 @@ static int igt_reset_queue(void *arg)
>  		i915_gem_chipset_flush(i915);
>  
>  		i915_gem_request_put(prev);
> +
> +		err = flush_test(i915, I915_WAIT_LOCKED);
> +		if (err)
> +			break;
>  	}
>  
>  fini:
> -- 
> 2.15.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux