Thanks Chris. This is Xiao from GVT-g team. GVT-g is actually looking for this patch once GUC submission enabled in host/native side. GVT-g requires a single submission (only one request in GUC FW anytime) to GUC FW because we need to do additional setup/cleanup for VM switch. One thing need to mention: we need 'execlists_context_status_change' notification before i915_guc_submit() just what we did for ELSP port write. Anyway we can submit another patch based on this. The question is: when will this patch be merged to upstream? > -----Original Message----- > From: Intel-gfx [mailto:intel-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx] On Behalf > Of Chris Wilson > Sent: Thursday, February 2, 2017 6:36 PM > To: intel-gfx@xxxxxxxxxxxxxxxxxxxxx > Subject: [PATCH v8] drm/i915/scheduler: emulate a scheduler for > guc > > This emulates execlists on top of the GuC in order to defer submission of > requests to the hardware. This deferral allows time for high priority requests > to gazump their way to the head of the queue, however it nerfs the GuC by > converting it back into a simple execlist (where the CPU has to wake up after > every request to feed new commands into the GuC). > > v2: Drop hack status - though iirc there is still a lockdep inversion between > fence and engine->timeline->lock (which is impossible as the nesting only > occurs on different fences - hopefully just requires some judicious lockdep > annotation) > v3: Apply lockdep nesting to enabling signaling on the request, using the > pattern we already have in __i915_gem_request_submit(); > v4: Replaying requests after a hang also now needs the timeline spinlock, to > disable the interrupts at least > v5: Hold wq lock for completeness, and emit a tracepoint for enabling signal > v6: Reorder interrupt checking for a happier gcc. > v7: Only signal the tasklet after a user-interrupt if using guc scheduling > v8: Restore lost update of rq through the i915_guc_irq_handler (Tvrtko) > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_guc_submission.c | 110 > +++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/i915_irq.c | 13 +++- > drivers/gpu/drm/i915/intel_lrc.c | 5 +- > 3 files changed, 114 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c > b/drivers/gpu/drm/i915/i915_guc_submission.c > index 8ced9e26f075..d99185aafe58 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -25,6 +25,8 @@ > #include "i915_drv.h" > #include "intel_uc.h" > > +#include <trace/events/dma_fence.h> > + > /** > * DOC: GuC-based command submission > * > @@ -348,7 +350,7 @@ int i915_guc_wq_reserve(struct > drm_i915_gem_request *request) > u32 freespace; > int ret; > > - spin_lock(&client->wq_lock); > + spin_lock_irq(&client->wq_lock); > freespace = CIRC_SPACE(client->wq_tail, desc->head, client- > >wq_size); > freespace -= client->wq_rsvd; > if (likely(freespace >= wqi_size)) { > @@ -358,7 +360,7 @@ int i915_guc_wq_reserve(struct > drm_i915_gem_request *request) > client->no_wq_space++; > ret = -EAGAIN; > } > - spin_unlock(&client->wq_lock); > + spin_unlock_irq(&client->wq_lock); > > return ret; > } > @@ -370,9 +372,9 @@ void i915_guc_wq_unreserve(struct > drm_i915_gem_request *request) > > GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); > > - spin_lock(&client->wq_lock); > + spin_lock_irq(&client->wq_lock); > client->wq_rsvd -= wqi_size; > - spin_unlock(&client->wq_lock); > + spin_unlock_irq(&client->wq_lock); > } > > /* Construct a Work Item and append it to the GuC's Work Queue */ @@ - > 532,10 +534,97 @@ static void __i915_guc_submit(struct > drm_i915_gem_request *rq) > > static void i915_guc_submit(struct drm_i915_gem_request *rq) { > - i915_gem_request_submit(rq); > + __i915_gem_request_submit(rq); > __i915_guc_submit(rq); > } > > +static void nested_enable_signaling(struct drm_i915_gem_request *rq) { > + /* If we use dma_fence_enable_sw_signaling() directly, lockdep > + * detects an ordering issue between the fence lockclass and the > + * global_timeline. This circular dependency can only occur via 2 > + * different fences (but same fence lockclass), so we use the nesting > + * annotation here to prevent the warn, equivalent to the nesting > + * inside i915_gem_request_submit() for when we also enable the > + * signaler. > + */ > + > + if (test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, > + &rq->fence.flags)) > + return; > + > + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq- > >fence.flags)); > + trace_dma_fence_enable_signal(&rq->fence); > + > + spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING); > + intel_engine_enable_signaling(rq); > + spin_unlock(&rq->lock); > +} > + > +static bool i915_guc_dequeue(struct intel_engine_cs *engine) { > + struct execlist_port *port = engine->execlist_port; > + struct drm_i915_gem_request *last = port[0].request; > + unsigned long flags; > + struct rb_node *rb; > + bool submit = false; > + > + spin_lock_irqsave(&engine->timeline->lock, flags); > + rb = engine->execlist_first; > + while (rb) { > + struct drm_i915_gem_request *cursor = > + rb_entry(rb, typeof(*cursor), priotree.node); > + > + if (last && cursor->ctx != last->ctx) { > + if (port != engine->execlist_port) > + break; > + > + i915_gem_request_assign(&port->request, last); > + nested_enable_signaling(last); > + port++; > + } > + > + rb = rb_next(rb); > + rb_erase(&cursor->priotree.node, &engine- > >execlist_queue); > + RB_CLEAR_NODE(&cursor->priotree.node); > + cursor->priotree.priority = INT_MAX; > + > + i915_guc_submit(cursor); > + last = cursor; > + submit = true; > + } > + if (submit) { > + i915_gem_request_assign(&port->request, last); > + nested_enable_signaling(last); > + engine->execlist_first = rb; > + } > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > + > + return submit; > +} > + > +static void i915_guc_irq_handler(unsigned long data) { > + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; > + struct execlist_port *port = engine->execlist_port; > + struct drm_i915_gem_request *rq; > + bool submit; > + > + do { > + rq = port[0].request; > + while (rq && i915_gem_request_completed(rq)) { > + i915_gem_request_put(rq); > + port[0].request = port[1].request; > + port[1].request = NULL; > + rq = port[0].request; > + } > + > + submit = false; > + if (!port[1].request) > + submit = i915_guc_dequeue(engine); > + } while (submit); > +} > + > /* > * Everything below here is concerned with setup & teardown, and is > * therefore not part of the somewhat time-critical batch-submission @@ - > 944,15 +1033,22 @@ int i915_guc_submission_enable(struct > drm_i915_private *dev_priv) > /* Take over from manual control of ELSP (execlists) */ > for_each_engine(engine, dev_priv, id) { > struct drm_i915_gem_request *rq; > + unsigned long flags; > > - engine->submit_request = i915_guc_submit; > - engine->schedule = NULL; > + tasklet_init(&engine->irq_tasklet, > + i915_guc_irq_handler, > + (unsigned long)engine); > > /* Replay the current set of previously submitted requests */ > + spin_lock_irqsave(&engine->timeline->lock, flags); > list_for_each_entry(rq, &engine->timeline->requests, link) { > + spin_lock(&client->wq_lock); > client->wq_rsvd += sizeof(struct guc_wq_item); > + spin_unlock(&client->wq_lock); > + > __i915_guc_submit(rq); > } > + spin_unlock_irqrestore(&engine->timeline->lock, flags); > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_irq.c > b/drivers/gpu/drm/i915/i915_irq.c index 436b5baf38c2..72f9497518bc 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1350,13 +1350,20 @@ static void snb_gt_irq_handler(struct > drm_i915_private *dev_priv, static __always_inline void > gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { > - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) > - notify_ring(engine); > + bool tasklet = false; > > if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { > set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - tasklet_hi_schedule(&engine->irq_tasklet); > + tasklet = true; > } > + > + if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { > + notify_ring(engine); > + tasklet |= i915.enable_guc_submission; > + } > + > + if (tasklet) > + tasklet_hi_schedule(&engine->irq_tasklet); > } > > static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff -- > git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 44a92ea464ba..c81d86afb52f 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1278,7 +1278,7 @@ static int gen8_init_common_ring(struct > intel_engine_cs *engine) > > /* After a GPU reset, we may have requests to replay */ > clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); > - if (!execlists_elsp_idle(engine)) { > + if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) { > engine->execlist_port[0].count = 0; > engine->execlist_port[1].count = 0; > execlists_submit_ports(engine); > @@ -1345,9 +1345,6 @@ static void reset_common_ring(struct > intel_engine_cs *engine, > request->ring->last_retired_head = -1; > intel_ring_update_space(request->ring); > > - if (i915.enable_guc_submission) > - return; > - > /* Catch up with any missed context-switch interrupts */ > if (request->ctx != port[0].request->ctx) { > i915_gem_request_put(port[0].request); > -- > 2.11.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx