On Tue, Apr 18, 2017 at 01:23:33PM -0700, Michel Thierry wrote: > Final enablement patch for GPU hang detection using watchdog timeout. > Using the gem_context_setparam ioctl, users can specify the desired > timeout value in microseconds, and the driver will do the conversion to > 'timestamps'. > > The recommended default watchdog threshold for video engines is 60000 us, > since this has been _empirically determined_ to be a good compromise for > low-latency requirements and low rate of false positives. The default > register value is ~106000us and the theoretical max value (all 1s) is > 353 seconds. > > Note, UABI engine ids and i915 engine ids are different, and this patch > uses the i915 ones. Some kind of mapping table [1] is required if we > decide to use the UABI engine ids. > > [1] http://patchwork.freedesktop.org/patch/msgid/20170329135831.30254-2-chris@xxxxxxxxxxxxxxxxxx > > v2: Fixed get api to return values in microseconds. Threshold updated to > be per context engine. Check for u32 overflow. Capture ctx threshold > value in error state. > > v3: Add a way to get array size, short-cut to disable all thresholds, > return EFAULT / EINVAL as needed. Move the capture of the threshold > value in the error state into a new patch. BXT has a different > timestamp base (because why not?). > > Signed-off-by: Tomas Elf <tomas.elf@xxxxxxxxx> > Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> > Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 29 +++++++++ > drivers/gpu/drm/i915/i915_gem_context.c | 102 ++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 5 +- > include/uapi/drm/i915_drm.h | 1 + > 4 files changed, 135 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 203f2112dd18..f65a236fddef 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3574,6 +3574,35 @@ i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, > return &vm->timeline.engine[engine->id]; > } > > +/* > + * BDW & SKL+ Timestamp timer resolution = 0.080 uSec, > + * or 12500000 counts per second, or ~12 counts per microsecond. > + * > + * But Broxton Timestamp timer resolution is different, 0.052 uSec, > + * or 19200000 counts per second, or ~19 counts per microsecond. > + */ > +#define SKL_TIMESTAMP_CNTS_PER_USEC 12 > +#define BXT_TIMESTAMP_CNTS_PER_USEC 19 > +#define TIMESTAMP_CNTS_PER_USEC(dev_priv) (IS_BROXTON(dev_priv) ? \ > + BXT_TIMESTAMP_CNTS_PER_USEC : \ > + SKL_TIMESTAMP_CNTS_PER_USEC) > +static inline u32 > +watchdog_to_us(struct drm_i915_private *dev_priv, u32 value_in_clock_counts) > +{ > + return value_in_clock_counts / TIMESTAMP_CNTS_PER_USEC(dev_priv); > +} > + > +static inline u32 > +watchdog_to_clock_counts(struct drm_i915_private *dev_priv, u64 value_in_us) > +{ > + u64 threshold = value_in_us * TIMESTAMP_CNTS_PER_USEC(dev_priv); > + > + if (overflows_type(threshold, u32)) > + return -EINVAL; > + > + return threshold; > +} > + > int i915_perf_open_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index edbed85a1c88..85a6467a25a6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -422,6 +422,102 @@ i915_gem_context_create_gvt(struct drm_device *dev) > return ctx; > } > > +/* Return the timer count threshold in microseconds. */ > +int i915_gem_context_get_watchdog(struct i915_gem_context *ctx, > + struct drm_i915_gem_context_param *args) > +{ > + struct drm_i915_private *dev_priv = ctx->i915; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + u32 threshold_in_us[I915_NUM_ENGINES]; > + > + if (args->size == 0) > + goto out; > + > + if (args->size < sizeof(threshold_in_us)) > + return -EFAULT; > + > + if (!dev_priv->engine[VCS]->emit_start_watchdog) > + return -ENODEV; > + > + for_each_engine(engine, dev_priv, id) { > + struct intel_context *ce = &ctx->engine[id]; > + > + threshold_in_us[id] = watchdog_to_us(dev_priv, > + ce->watchdog_threshold); > + } > + > + mutex_unlock(&dev_priv->drm.struct_mutex); > + if (__copy_to_user(u64_to_user_ptr(args->value), > + &threshold_in_us, > + sizeof(threshold_in_us))) { > + mutex_lock(&dev_priv->drm.struct_mutex); > + return -EFAULT; > + } > + mutex_lock(&dev_priv->drm.struct_mutex); > + > +out: > + args->size = sizeof(threshold_in_us); > + > + return 0; > +} > + > +/* > + * Based on time out value in microseconds (us) calculate > + * timer count thresholds needed based on core frequency. > + * Watchdog can be disabled by setting it to 0. > + */ > +int i915_gem_context_set_watchdog(struct i915_gem_context *ctx, > + struct drm_i915_gem_context_param *args) > +{ > + struct drm_i915_private *dev_priv = ctx->i915; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + u32 threshold[I915_NUM_ENGINES]; > + > + memset(&threshold, 0, sizeof(threshold)); > + memset(threshold, 0, sizeof(threshold)); > + /* shortcut to disable in all engines */ > + if (args->size == 0) > + goto set_watchdog; > + > + if (args->size < sizeof(threshold)) > + return -EFAULT; > + > + if (!dev_priv->engine[VCS]->emit_start_watchdog) > + return -ENODEV; > + > + mutex_unlock(&dev_priv->drm.struct_mutex); > + if (copy_from_user(&threshold, if (copy_from_user(threshold, > + u64_to_user_ptr(args->value), > + sizeof(threshold))) { > + mutex_lock(&dev_priv->drm.struct_mutex); > + return -EFAULT; > + } > + mutex_lock(&dev_priv->drm.struct_mutex); > + > + /* not supported in blitter engine */ > + if (threshold[BCS] != 0) > + return -EINVAL; > + > + for_each_engine(engine, dev_priv, id) { > + threshold[id] = watchdog_to_clock_counts(dev_priv, > + threshold[id]); > + > + if (threshold[id] == -EINVAL) > + return -EINVAL; > + } > + > +set_watchdog: > + for_each_engine(engine, dev_priv, id) { > + struct intel_context *ce = &ctx->engine[id]; > + > + ce->watchdog_threshold = threshold[id]; > + } > + > + return 0; > +} > + > int i915_gem_context_init(struct drm_i915_private *dev_priv) > { > struct i915_gem_context *ctx; > @@ -1061,6 +1157,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, > case I915_CONTEXT_PARAM_BANNABLE: > args->value = i915_gem_context_is_bannable(ctx); > break; > + case I915_CONTEXT_PARAM_WATCHDOG: > + ret = i915_gem_context_get_watchdog(ctx, args); > + break; > default: > ret = -EINVAL; > break; > @@ -1118,6 +1217,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, > else > i915_gem_context_clear_bannable(ctx); > break; > + case I915_CONTEXT_PARAM_WATCHDOG: > + ret = i915_gem_context_set_watchdog(ctx, args); > + break; > default: > ret = -EINVAL; > break; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 7a202e73ce9b..f4e05531a9a0 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1496,7 +1496,7 @@ static inline u32 get_watchdog_disable(struct intel_engine_cs *engine) > return GEN8_XCS_WATCHDOG_DISABLE; > } > > -#define GEN8_WATCHDOG_1000US 0x2ee0 //XXX: Temp, replace with helper function > +#define GEN8_WATCHDOG_1000US(dev_priv) watchdog_to_clock_counts(dev_priv, 1000) > static void gen8_watchdog_irq_handler(unsigned long data) > { > struct intel_engine_cs *engine = (struct intel_engine_cs *)data; > @@ -1526,7 +1526,8 @@ static void gen8_watchdog_irq_handler(unsigned long data) > } else { > engine->hangcheck.watchdog = current_seqno; > /* Re-start the counter, if really hung, it will expire again */ > - I915_WRITE_FW(RING_THRESH(engine->mmio_base), GEN8_WATCHDOG_1000US); > + I915_WRITE_FW(RING_THRESH(engine->mmio_base), > + GEN8_WATCHDOG_1000US(dev_priv)); > I915_WRITE_FW(RING_CNTR(engine->mmio_base), GEN8_WATCHDOG_ENABLE); > } > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index fadedefba6db..18bc0ec618dd 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -1308,6 +1308,7 @@ struct drm_i915_gem_context_param { > #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 > #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 > #define I915_CONTEXT_PARAM_BANNABLE 0x5 > +#define I915_CONTEXT_PARAM_WATCHDOG 0x6 > __u64 value; > }; > > -- > 2.11.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx