Re: [PATCH 1/8] drm/i915: Hack and slash, throttle execbuffer hogs

Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> · Thu, 07 Feb 2019 18:01:31 +0200

Quoting Chris Wilson (2019-02-07 09:18:22)
> Apply backpressure to hogs that emit requests faster than the GPU can
> process them by waiting for their ring to be less than half-full before
> proceeding with taking the struct_mutex.
> 
> This is a gross hack to apply throttling backpressure, the long term
> goal is to remove the struct_mutex contention so that each client
> naturally waits, preferably in an asynchronous, nonblocking fashion
> (pipelined operations for the win), for their own resources and never
> blocks another client within the driver at least. (Realtime priority
> goals would extend to ensuring that resource contention favours high
> priority clients as well.)
> 
> This patch only limits excessive request production and does not attempt
> to throttle clients that block waiting for eviction (either global GTT or
> system memory) or any other global resources, see above for the long term
> goal.

Code checks out.

Some Testcase: links and some relative perf numbers might be in place.

It's pretty arbitarily tuned algorithm, after all :)

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>

Regards, Joonas

> 
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
> Cc: John Harrison <John.C.Harrison@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 63 ++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.c    | 13 -----
>  drivers/gpu/drm/i915/intel_ringbuffer.h    | 12 +++++
>  3 files changed, 75 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 8eedf7cac493..84ef3abc567e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -753,6 +753,64 @@ static int eb_select_context(struct i915_execbuffer *eb)
>         return 0;
>  }
>  
> +static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
> +{
> +       struct i915_request *rq;
> +
> +       if (intel_ring_update_space(ring) >= PAGE_SIZE)
> +               return NULL;
> +
> +       /*
> +        * Find a request that after waiting upon, there will be at least half
> +        * the ring available. The hystersis allows us to compete for the
> +        * shared ring and should mean that we sleep less often prior to
> +        * claiming our resources, but not so long that the ring completely
> +        * drains before we can submit our next request.
> +        */
> +       list_for_each_entry(rq, &ring->request_list, ring_link) {
> +               if (__intel_ring_space(rq->postfix,
> +                                      ring->emit, ring->size) > ring->size / 2)
> +                       break;
> +       }
> +       if (&rq->ring_link == &ring->request_list)
> +               return NULL; /* weird, we will check again later for real */
> +
> +       return i915_request_get(rq);
> +}
> +
> +static int eb_wait_for_ring(const struct i915_execbuffer *eb)
> +{
> +       const struct intel_context *ce;
> +       struct i915_request *rq;
> +       int ret = 0;
> +
> +       /*
> +        * Apply a light amount of backpressure to prevent excessive hogs
> +        * from blocking waiting for space whilst holding struct_mutex and
> +        * keeping all of their resources pinned.
> +        */
> +
> +       ce = to_intel_context(eb->ctx, eb->engine);
> +       if (!ce->ring) /* first use, assume empty! */
> +               return 0;
> +
> +       rq = __eb_wait_for_ring(ce->ring);
> +       if (rq) {
> +               mutex_unlock(&eb->i915->drm.struct_mutex);
> +
> +               if (i915_request_wait(rq,
> +                                     I915_WAIT_INTERRUPTIBLE,
> +                                     MAX_SCHEDULE_TIMEOUT) < 0)
> +                       ret = -EINTR;
> +
> +               i915_request_put(rq);
> +
> +               mutex_lock(&eb->i915->drm.struct_mutex);
> +       }
> +
> +       return ret;
> +}
> +
>  static int eb_lookup_vmas(struct i915_execbuffer *eb)
>  {
>         struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
> @@ -2291,6 +2349,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>         if (err)
>                 goto err_rpm;
>  
> +       err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
> +       if (unlikely(err))
> +               goto err_unlock;
> +
>         err = eb_relocate(&eb);
>         if (err) {
>                 /*
> @@ -2435,6 +2497,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>  err_vma:
>         if (eb.exec)
>                 eb_release_vmas(&eb);
> +err_unlock:
>         mutex_unlock(&dev->struct_mutex);
>  err_rpm:
>         intel_runtime_pm_put(eb.i915, wakeref);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index b889b27f8aeb..7f841dba87b3 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -49,19 +49,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
>                 I915_GEM_HWS_INDEX_ADDR);
>  }
>  
> -static unsigned int __intel_ring_space(unsigned int head,
> -                                      unsigned int tail,
> -                                      unsigned int size)
> -{
> -       /*
> -        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> -        * same cacheline, the Head Pointer must not be greater than the Tail
> -        * Pointer."
> -        */
> -       GEM_BUG_ON(!is_power_of_2(size));
> -       return (head - tail - CACHELINE_BYTES) & (size - 1);
> -}
> -
>  unsigned int intel_ring_update_space(struct intel_ring *ring)
>  {
>         unsigned int space;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 4d4ea6963a72..710ffb221775 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -832,6 +832,18 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
>         return tail;
>  }
>  
> +static inline unsigned int
> +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
> +{
> +       /*
> +        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
> +        * same cacheline, the Head Pointer must not be greater than the Tail
> +        * Pointer."
> +        */
> +       GEM_BUG_ON(!is_power_of_2(size));
> +       return (head - tail - CACHELINE_BYTES) & (size - 1);
> +}
> +
>  void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
>  
>  int intel_engine_setup_common(struct intel_engine_cs *engine);
> -- 
> 2.20.1
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx