Re: [29/33] drm/i915/gt: Expose busywait duration to sysfs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Dec 12, 2019 at 02:04:55PM +0000, Chris Wilson wrote:
> We busywait on an inflight request (one that is currently executing on
> HW, and so might complete quickly) prior to setting up an interrupt and
> sleeping. The trade off is that we keep an expensive CPU core busy in
> order to avoid wake up latency: where that trade off should lie is best
> left to the sysadmin.
> 
> The busywait mechanism can be compiled out with
> 
> 	./scripts/config --set-val DRM_I915_SPIN_REQUEST 0
> 
> The maximum busywait duration can be adjusted per-engine using,
> 
> 	/sys/class/drm/card?/engine/*/ms_busywait_duration_ns
> 
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>

Code looks good.
Tested the patch with previous sysfs related patches in the series.
The directories and files show up with expected values.
Busywait can be changed.

Reviewed-by: Steve Carbonari <steven.carbonari@xxxxxxxxx>
Tested-by: Steve Carbonari <steven.carbonari@xxxxxxxxx


> ---
>  drivers/gpu/drm/i915/Kconfig.profile         |  9 ++--
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  2 +
>  drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 49 ++++++++++++++++++++
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |  1 +
>  drivers/gpu/drm/i915/i915_request.c          | 19 ++++----
>  5 files changed, 68 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
> index d8d4a16179bd..9ee3b59685b9 100644
> --- a/drivers/gpu/drm/i915/Kconfig.profile
> +++ b/drivers/gpu/drm/i915/Kconfig.profile
> @@ -35,9 +35,9 @@ config DRM_I915_PREEMPT_TIMEOUT
>  
>  	  May be 0 to disable the timeout.
>  
> -config DRM_I915_SPIN_REQUEST
> -	int "Busywait for request completion (us)"
> -	default 5 # microseconds
> +config DRM_I915_MAX_REQUEST_BUSYWAIT
> +	int "Busywait for request completion limit (ns)"
> +	default 8000 # nanoseconds
>  	help
>  	  Before sleeping waiting for a request (GPU operation) to complete,
>  	  we may spend some time polling for its completion. As the IRQ may
> @@ -45,6 +45,9 @@ config DRM_I915_SPIN_REQUEST
>  	  check if the request will complete in the time it would have taken
>  	  us to enable the interrupt.
>  
> +	  This is adjustable via
> +	  /sys/class/drm/card?/engine/*/max_busywait_duration_ns
> +
>  	  May be 0 to disable the initial spin. In practice, we estimate
>  	  the cost of enabling the interrupt (if currently disabled) to be
>  	  a few microseconds.
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 9e16d8a9bbf5..9162b94a7b80 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -312,6 +312,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
>  
>  	engine->props.heartbeat_interval_ms =
>  		CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
> +	engine->props.max_busywait_duration_ns =
> +		CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
>  	engine->props.preempt_timeout_ms =
>  		CONFIG_DRM_I915_PREEMPT_TIMEOUT;
>  	engine->props.stop_timeout_ms =
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> index b1bd768b13d7..6d87529c64a7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
> @@ -142,6 +142,54 @@ all_caps_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
>  static struct kobj_attribute all_caps_attr =
>  __ATTR(known_capabilities, 0444, all_caps_show, NULL);
>  
> +static ssize_t
> +max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
> +	       const char *buf, size_t count)
> +{
> +	struct intel_engine_cs *engine = kobj_to_engine(kobj);
> +	unsigned long long duration;
> +	int err;
> +
> +	/*
> +	 * When waiting for a request, if is it currently being executed
> +	 * on the GPU, we busywait for a short while before sleeping. The
> +	 * premise is that most requests are short, and if it is already
> +	 * executing then there is a good chance that it will complete
> +	 * before we can setup the interrupt handler and go to sleep.
> +	 * We try to offset the cost of going to sleep, by first spinning
> +	 * on the request -- if it completed in less time than it would take
> +	 * to go sleep, process the interrupt and return back to the client,
> +	 * then we have saved the client some latency, albeit at the cost
> +	 * of spinning on an expensive CPU core.
> +	 *
> +	 * While we try to avoid waiting at all for a request that is unlikely
> +	 * to complete, deciding how long it is worth spinning is for is an
> +	 * arbitrary decision: trading off power vs latency.
> +	 */
> +
> +	err = kstrtoull(buf, 0, &duration);
> +	if (err)
> +		return err;
> +
> +	if (duration > jiffies_to_nsecs(2))
> +		return -EINVAL;
> +
> +	WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
> +
> +	return count;
> +}
> +
> +static ssize_t
> +max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
> +{
> +	struct intel_engine_cs *engine = kobj_to_engine(kobj);
> +
> +	return sprintf(buf, "%lu\n", engine->props.max_busywait_duration_ns);
> +}
> +
> +static struct kobj_attribute max_spin_attr =
> +__ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store);
> +
>  static ssize_t
>  timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
>  		const char *buf, size_t count)
> @@ -224,6 +272,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
>  		&mmio_attr.attr,
>  		&caps_attr.attr,
>  		&all_caps_attr.attr,
> +		&max_spin_attr.attr,
>  		NULL
>  	};
>  
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index ede7ce2695cd..807796d619e9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -525,6 +525,7 @@ struct intel_engine_cs {
>  
>  	struct {
>  		unsigned long heartbeat_interval_ms;
> +		unsigned long max_busywait_duration_ns;
>  		unsigned long preempt_timeout_ms;
>  		unsigned long stop_timeout_ms;
>  		unsigned long timeslice_duration_ms;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 15767c2d9ea1..2f6979009c8f 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1381,7 +1381,7 @@ void i915_request_add(struct i915_request *rq)
>  	mutex_unlock(&tl->mutex);
>  }
>  
> -static unsigned long local_clock_us(unsigned int *cpu)
> +static unsigned long local_clock_ns(unsigned int *cpu)
>  {
>  	unsigned long t;
>  
> @@ -1398,7 +1398,7 @@ static unsigned long local_clock_us(unsigned int *cpu)
>  	 * stop busywaiting, see busywait_stop().
>  	 */
>  	*cpu = get_cpu();
> -	t = local_clock() >> 10;
> +	t = local_clock();
>  	put_cpu();
>  
>  	return t;
> @@ -1408,15 +1408,15 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
>  {
>  	unsigned int this_cpu;
>  
> -	if (time_after(local_clock_us(&this_cpu), timeout))
> +	if (time_after(local_clock_ns(&this_cpu), timeout))
>  		return true;
>  
>  	return this_cpu != cpu;
>  }
>  
> -static bool __i915_spin_request(const struct i915_request * const rq,
> -				int state, unsigned long timeout_us)
> +static bool __i915_spin_request(const struct i915_request * const rq, int state)
>  {
> +	unsigned long timeout_ns;
>  	unsigned int cpu;
>  
>  	/*
> @@ -1444,7 +1444,8 @@ static bool __i915_spin_request(const struct i915_request * const rq,
>  	 * takes to sleep on a request, on the order of a microsecond.
>  	 */
>  
> -	timeout_us += local_clock_us(&cpu);
> +	timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
> +	timeout_ns += local_clock_ns(&cpu);
>  	do {
>  		if (i915_request_completed(rq))
>  			return true;
> @@ -1452,7 +1453,7 @@ static bool __i915_spin_request(const struct i915_request * const rq,
>  		if (signal_pending_state(state, current))
>  			break;
>  
> -		if (busywait_stop(timeout_us, cpu))
> +		if (busywait_stop(timeout_ns, cpu))
>  			break;
>  
>  		cpu_relax();
> @@ -1538,8 +1539,8 @@ long i915_request_wait(struct i915_request *rq,
>  	 * completion. That requires having a good predictor for the request
>  	 * duration, which we currently lack.
>  	 */
> -	if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&
> -	    __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
> +	if (IS_ACTIVE(CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT) &&
> +	    __i915_spin_request(rq, state)) {
>  		dma_fence_signal(&rq->fence);
>  		goto out;
>  	}
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx



[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux