Re: [PATCH 3/3] drm/i915/execlists: Force preemption

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes:

> If the preempted context takes too long to relinquish control, e.g. it
> is stuck inside a shader with arbitration disabled, evict that context
> with an engine reset. This ensures that preemptions are reasonably
> responsive, providing a tighter QoS for the more important context at
> the cost of flagging unresponsive contexts more frequently (i.e. instead
> of using an ~10s hangcheck, we now evict at ~10ms).  The challenge of
> lies in picking a timeout that can be reasonably serviced by HW for
> typical workloads, balancing the existing clients against the needs for
> responsiveness.
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/Kconfig.profile | 12 ++++++
>  drivers/gpu/drm/i915/gt/intel_lrc.c  | 56 ++++++++++++++++++++++++++--
>  2 files changed, 65 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
> index 48df8889a88a..8273d3baafe4 100644
> --- a/drivers/gpu/drm/i915/Kconfig.profile
> +++ b/drivers/gpu/drm/i915/Kconfig.profile
> @@ -25,3 +25,15 @@ config DRM_I915_SPIN_REQUEST
>  	  May be 0 to disable the initial spin. In practice, we estimate
>  	  the cost of enabling the interrupt (if currently disabled) to be
>  	  a few microseconds.
> +
> +config DRM_I915_PREEMPT_TIMEOUT
> +	int "Preempt timeout (ms)"
> +	default 10 # milliseconds
> +	help
> +	  How long to wait (in milliseconds) for a preemption event to occur
> +	  when submitting a new context via execlists. If the current context
> +	  does not hit an arbitration point and yield to HW before the timer
> +	  expires, the HW will be reset to allow the more important context
> +	  to execute.
> +
> +	  May be 0 to disable the timeout.
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index fca79adb4aa3..e8d7deba3e49 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -889,6 +889,15 @@ enable_timeslice(struct intel_engine_cs *engine)
>  	return last && need_timeslice(engine, last);
>  }
>  
> +static unsigned long preempt_expires(void)
> +{
> +	unsigned long timeout =

could be const

> +		msecs_to_jiffies_timeout(CONFIG_DRM_I915_PREEMPT_TIMEOUT);
> +
> +	barrier();

This needs a comment. I fail to connect the dots as jiffies
is volatile by nature.

> +	return jiffies + timeout;
> +}
> +
>  static void execlists_dequeue(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> @@ -1220,6 +1229,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>  		*port = execlists_schedule_in(last, port - execlists->pending);
>  		memset(port + 1, 0, (last_port - port) * sizeof(*port));
>  		execlists_submit_ports(engine);
> +
> +		if (CONFIG_DRM_I915_PREEMPT_TIMEOUT)
> +			mod_timer(&execlists->timer, preempt_expires());
>  	}
>  }
>  
> @@ -1376,13 +1388,48 @@ static void process_csb(struct intel_engine_cs *engine)
>  	invalidate_csb_entries(&buf[0], &buf[num_entries - 1]);
>  }
>  
> -static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
> +static bool __execlists_submission_tasklet(struct intel_engine_cs *const engine)
>  {
>  	lockdep_assert_held(&engine->active.lock);
>  
>  	process_csb(engine);
> -	if (!engine->execlists.pending[0])
> +	if (!engine->execlists.pending[0]) {
>  		execlists_dequeue(engine);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +static void preempt_reset(struct intel_engine_cs *engine)
> +{
> +	const unsigned int bit = I915_RESET_ENGINE + engine->id;
> +	unsigned long *lock = &engine->i915->gpu_error.flags;
> +
> +	if (test_and_set_bit(bit, lock))
> +		return;
> +
> +	tasklet_disable_nosync(&engine->execlists.tasklet);
> +	spin_unlock(&engine->active.lock);
> +

Why do we need to drop the lock?
-Mika

> +	i915_reset_engine(engine, "preemption time out");
> +
> +	spin_lock(&engine->active.lock);
> +	tasklet_enable(&engine->execlists.tasklet);
> +
> +	clear_bit(bit, lock);
> +	wake_up_bit(lock, bit);
> +}
> +
> +static bool preempt_timeout(struct intel_engine_cs *const engine)
> +{
> +	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
> +		return false;
> +
> +	if (!intel_engine_has_preemption(engine))
> +		return false;
> +
> +	return !timer_pending(&engine->execlists.timer);
>  }
>  
>  /*
> @@ -1395,7 +1442,10 @@ static void execlists_submission_tasklet(unsigned long data)
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&engine->active.lock, flags);
> -	__execlists_submission_tasklet(engine);
> +
> +	if (!__execlists_submission_tasklet(engine) && preempt_timeout(engine))
> +		preempt_reset(engine);
> +
>  	spin_unlock_irqrestore(&engine->active.lock, flags);
>  }
>  
> -- 
> 2.20.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux