Quoting Chris Wilson (2019-02-07 09:18:22) > Apply backpressure to hogs that emit requests faster than the GPU can > process them by waiting for their ring to be less than half-full before > proceeding with taking the struct_mutex. > > This is a gross hack to apply throttling backpressure, the long term > goal is to remove the struct_mutex contention so that each client > naturally waits, preferably in an asynchronous, nonblocking fashion > (pipelined operations for the win), for their own resources and never > blocks another client within the driver at least. (Realtime priority > goals would extend to ensuring that resource contention favours high > priority clients as well.) > > This patch only limits excessive request production and does not attempt > to throttle clients that block waiting for eviction (either global GTT or > system memory) or any other global resources, see above for the long term > goal. Code checks out. Some Testcase: links and some relative perf numbers might be in place. It's pretty arbitarily tuned algorithm, after all :) Reviewed-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Regards, Joonas > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: John Harrison <John.C.Harrison@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 63 ++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_ringbuffer.c | 13 ----- > drivers/gpu/drm/i915/intel_ringbuffer.h | 12 +++++ > 3 files changed, 75 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 8eedf7cac493..84ef3abc567e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -753,6 +753,64 @@ static int eb_select_context(struct i915_execbuffer *eb) > return 0; > } > > +static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) > +{ > + struct i915_request *rq; > + > + if (intel_ring_update_space(ring) >= PAGE_SIZE) > + return NULL; > + > + /* > + * Find a request that after waiting upon, there will be at least half > + * the ring available. The hystersis allows us to compete for the > + * shared ring and should mean that we sleep less often prior to > + * claiming our resources, but not so long that the ring completely > + * drains before we can submit our next request. > + */ > + list_for_each_entry(rq, &ring->request_list, ring_link) { > + if (__intel_ring_space(rq->postfix, > + ring->emit, ring->size) > ring->size / 2) > + break; > + } > + if (&rq->ring_link == &ring->request_list) > + return NULL; /* weird, we will check again later for real */ > + > + return i915_request_get(rq); > +} > + > +static int eb_wait_for_ring(const struct i915_execbuffer *eb) > +{ > + const struct intel_context *ce; > + struct i915_request *rq; > + int ret = 0; > + > + /* > + * Apply a light amount of backpressure to prevent excessive hogs > + * from blocking waiting for space whilst holding struct_mutex and > + * keeping all of their resources pinned. > + */ > + > + ce = to_intel_context(eb->ctx, eb->engine); > + if (!ce->ring) /* first use, assume empty! */ > + return 0; > + > + rq = __eb_wait_for_ring(ce->ring); > + if (rq) { > + mutex_unlock(&eb->i915->drm.struct_mutex); > + > + if (i915_request_wait(rq, > + I915_WAIT_INTERRUPTIBLE, > + MAX_SCHEDULE_TIMEOUT) < 0) > + ret = -EINTR; > + > + i915_request_put(rq); > + > + mutex_lock(&eb->i915->drm.struct_mutex); > + } > + > + return ret; > +} > + > static int eb_lookup_vmas(struct i915_execbuffer *eb) > { > struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; > @@ -2291,6 +2349,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, > if (err) > goto err_rpm; > > + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ > + if (unlikely(err)) > + goto err_unlock; > + > err = eb_relocate(&eb); > if (err) { > /* > @@ -2435,6 +2497,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > err_vma: > if (eb.exec) > eb_release_vmas(&eb); > +err_unlock: > mutex_unlock(&dev->struct_mutex); > err_rpm: > intel_runtime_pm_put(eb.i915, wakeref); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index b889b27f8aeb..7f841dba87b3 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -49,19 +49,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) > I915_GEM_HWS_INDEX_ADDR); > } > > -static unsigned int __intel_ring_space(unsigned int head, > - unsigned int tail, > - unsigned int size) > -{ > - /* > - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the > - * same cacheline, the Head Pointer must not be greater than the Tail > - * Pointer." > - */ > - GEM_BUG_ON(!is_power_of_2(size)); > - return (head - tail - CACHELINE_BYTES) & (size - 1); > -} > - > unsigned int intel_ring_update_space(struct intel_ring *ring) > { > unsigned int space; > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 4d4ea6963a72..710ffb221775 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -832,6 +832,18 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) > return tail; > } > > +static inline unsigned int > +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) > +{ > + /* > + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the > + * same cacheline, the Head Pointer must not be greater than the Tail > + * Pointer." > + */ > + GEM_BUG_ON(!is_power_of_2(size)); > + return (head - tail - CACHELINE_BYTES) & (size - 1); > +} > + > void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); > > int intel_engine_setup_common(struct intel_engine_cs *engine); > -- > 2.20.1 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx