Quoting Tvrtko Ursulin (2019-12-04 14:12:42) > > On 04/12/2019 11:26, Chris Wilson wrote: > > We want the bonded request to have the same scheduler properties as its > > master so that it is placed at the same depth in the queue. For example, > > consider we have requests A, B and B', where B & B' are a bonded pair to > > run in parallel on two engines. > > > > A -> B > > \- B' > > > > B will run after A and so may be scheduled on an idle engine and wait on > > A using a semaphore. B' sees B being executed and so enters the queue on > > the same engine as A. As B' did not inherit the semaphore-chain from B, > > it may have higher precedence than A and so preempts execution. However, > > B' then sits on a semaphore waiting for B, who is waiting for A, who is > > blocked by B. > > > > Ergo B' needs to inherit the scheduler properties from B (i.e. the > > semaphore chain) so that it is scheduled with the same priority as B and > > will not be executed ahead of Bs dependencies. > > > > Furthermore, to prevent the priorities changing via the expose fence on > > B', we need to couple in the dependencies for PI. This requires us to > > relax our sanity-checks that dependencies are strictly in order. > > > > Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding") > > Testcase: igt/gem_exec_balancer/bonded-chain > > Testcase: igt/gem_exec_balancer/bonded-semaphore > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > --- > > Transfer any semaphore dependencies as well. > > --- > > drivers/gpu/drm/i915/i915_request.c | 115 ++++++++++++++++++++------ > > drivers/gpu/drm/i915/i915_scheduler.c | 1 - > > 2 files changed, 90 insertions(+), 26 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > > index 3109b8a79b9f..e0d24977aa9b 100644 > > --- a/drivers/gpu/drm/i915/i915_request.c > > +++ b/drivers/gpu/drm/i915/i915_request.c > > @@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq) > > } > > > > static int > > -__i915_request_await_execution(struct i915_request *rq, > > - struct i915_request *signal, > > - void (*hook)(struct i915_request *rq, > > - struct dma_fence *signal), > > - gfp_t gfp) > > +__await_execution(struct i915_request *rq, > > + struct i915_request *signal, > > + void (*hook)(struct i915_request *rq, > > + struct dma_fence *signal), > > + gfp_t gfp) > > { > > struct execute_cb *cb; > > > > @@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq, > > } > > spin_unlock_irq(&signal->lock); > > > > + /* Copy across semaphore status as we need the same behaviour */ > > + rq->sched.flags |= signal->sched.flags; > > return 0; > > } > > > > @@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq) > > } > > > > static int > > -emit_semaphore_wait(struct i915_request *to, > > - struct i915_request *from, > > - gfp_t gfp) > > +__emit_semaphore_wait(struct i915_request *to, > > + struct i915_request *from, > > + u32 seqno) > > { > > const int has_token = INTEL_GEN(to->i915) >= 12; > > u32 hwsp_offset; > > - int len; > > + int len, err; > > u32 *cs; > > > > GEM_BUG_ON(INTEL_GEN(to->i915) < 8); > > > > - /* Just emit the first semaphore we see as request space is limited. */ > > - if (already_busywaiting(to) & from->engine->mask) > > - goto await_fence; > > - > > - if (i915_request_await_start(to, from) < 0) > > - goto await_fence; > > - > > - /* Only submit our spinner after the signaler is running! */ > > - if (__i915_request_await_execution(to, from, NULL, gfp)) > > - goto await_fence; > > - > > /* We need to pin the signaler's HWSP until we are finished reading. */ > > - if (intel_timeline_read_hwsp(from, to, &hwsp_offset)) > > - goto await_fence; > > + err = intel_timeline_read_hwsp(from, to, &hwsp_offset); > > + if (err) > > + return err; > > > > len = 4; > > if (has_token) > > @@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to, > > MI_SEMAPHORE_POLL | > > MI_SEMAPHORE_SAD_GTE_SDD) + > > has_token; > > - *cs++ = from->fence.seqno; > > + *cs++ = seqno; > > *cs++ = hwsp_offset; > > *cs++ = 0; > > if (has_token) { > > @@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to, > > } > > > > intel_ring_advance(to, cs); > > + return 0; > > +} > > + > > +static int > > +emit_semaphore_wait(struct i915_request *to, > > + struct i915_request *from, > > + gfp_t gfp) > > +{ > > + /* Just emit the first semaphore we see as request space is limited. */ > > + if (already_busywaiting(to) & from->engine->mask) > > + goto await_fence; > > + > > + if (i915_request_await_start(to, from) < 0) > > + goto await_fence; > > + > > + /* Only submit our spinner after the signaler is running! */ > > + if (__await_execution(to, from, NULL, gfp)) > > + goto await_fence; > > + > > + if (__emit_semaphore_wait(to, from, from->fence.seqno)) > > + goto await_fence; > > + > > to->sched.semaphores |= from->engine->mask; > > to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; > > return 0; > > @@ -993,6 +1007,58 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) > > return 0; > > } > > > > +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, > > + struct dma_fence *fence) > > +{ > > + return __intel_timeline_sync_is_later(tl, > > + fence->context, > > + fence->seqno - 1); > > +} > > + > > +static int intel_timeline_sync_set_start(struct intel_timeline *tl, > > + const struct dma_fence *fence) > > +{ > > + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); > > +} > > + > > +static int > > +__i915_request_await_execution(struct i915_request *to, > > + struct i915_request *from, > > + void (*hook)(struct i915_request *rq, > > + struct dma_fence *signal)) > > +{ > > + bool has_sync; > > + int err; > > + > > + /* Submit both requests at the same time */ > > + err = __await_execution(to, from, hook, I915_FENCE_GFP); > > + if (err) > > + return err; > > + > > + if (!to->engine->schedule) > > + return 0; > > + > > + /* Squash repeated depenendices to the same timelines */ > > + if (intel_timeline_sync_has_start(i915_request_timeline(to), > > + &from->fence)) > > + return 0; > > + > > + /* Ensure both start together after all semaphores in signal */ > > + if (from->sched.semaphores && !has_sync) { > > + err =__emit_semaphore_wait(to, from, from->fence.seqno - 1); > > Forgot to git add something? has_sync is uninitialized. has_sync was intel_timeline_sync_has_start, after rearranging the code it died. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx