Re: [PATCH] drm/i915/selftests: Add a safety net to live_workarounds

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes:

> Since live_workarounds poke around the w/a registers and checks to see
> if they survive across a reset, we are prone to fouling the machine and
> leaving it in a non-recoverable state. Wrap the probe inside a timeout
> to abort the test if the reset fails.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107188

How can we tell it is not about just reset flakyness but
associated whitelist poking?

-Mika

> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/selftests/igt_wedge_me.h | 48 +++++++++++++++++++
>  .../drm/i915/selftests/intel_workarounds.c    |  8 +++-
>  2 files changed, 55 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/i915/selftests/igt_wedge_me.h
>
> diff --git a/drivers/gpu/drm/i915/selftests/igt_wedge_me.h b/drivers/gpu/drm/i915/selftests/igt_wedge_me.h
> new file mode 100644
> index 000000000000..d2518cf9a5c8
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/selftests/igt_wedge_me.h
> @@ -0,0 +1,48 @@
> +/*
> + * SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2018 Intel Corporation
> + */
> +
> +#ifndef IGT_WEDGE_ME_H
> +#define IGT_WEDGE_ME_H
> +
> +struct igt_wedge_me {
> +	struct delayed_work work;
> +	struct drm_i915_private *i915;
> +	const char *name;
> +};
> +
> +static void __igt_wedge_me(struct work_struct *work)
> +{
> +	struct igt_wedge_me *w = container_of(work, typeof(*w), work.work);
> +
> +	pr_err("%s timed out, cancelling test.\n", w->name);
> +	i915_gem_set_wedged(w->i915);
> +}
> +
> +static void __igt_init_wedge(struct igt_wedge_me *w,
> +			     struct drm_i915_private *i915,
> +			     long timeout,
> +			     const char *name)
> +{
> +	w->i915 = i915;
> +	w->name = name;
> +
> +	INIT_DELAYED_WORK_ONSTACK(&w->work, __igt_wedge_me);
> +	schedule_delayed_work(&w->work, timeout);
> +}
> +
> +static void __igt_fini_wedge(struct igt_wedge_me *w)
> +{
> +	cancel_delayed_work_sync(&w->work);
> +	destroy_delayed_work_on_stack(&w->work);
> +	w->i915 = NULL;
> +}
> +
> +#define igt_wedge_on_timeout(W, DEV, TIMEOUT)				\
> +	for (__igt_init_wedge((W), (DEV), (TIMEOUT), __func__);		\
> +	     (W)->i915;							\
> +	     __igt_fini_wedge((W)))
> +
> +#endif /* IGT_WEDGE_ME_H */
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index fafdec3fe83e..0d39b3bf0c0d 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -6,6 +6,7 @@
>  
>  #include "../i915_selftest.h"
>  
> +#include "igt_wedge_me.h"
>  #include "mock_context.h"
>  
>  static struct drm_i915_gem_object *
> @@ -111,6 +112,7 @@ static int check_whitelist(const struct whitelist *w,
>  			   struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_gem_object *results;
> +	struct igt_wedge_me wedge;
>  	u32 *vaddr;
>  	int err;
>  	int i;
> @@ -119,7 +121,11 @@ static int check_whitelist(const struct whitelist *w,
>  	if (IS_ERR(results))
>  		return PTR_ERR(results);
>  
> -	err = i915_gem_object_set_to_cpu_domain(results, false);
> +	err = 0;
> +	igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
> +		err = i915_gem_object_set_to_cpu_domain(results, false);
> +	if (i915_terminally_wedged(&ctx->i915->gpu_error))
> +		err = -EIO;
>  	if (err)
>  		goto out_put;
>  
> -- 
> 2.18.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux