Quoting Tvrtko Ursulin (2018-01-22 18:43:54) > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > Keep a per-engine number of runnable (waiting for GPU time) requests. > > v2: > * Move queued increment from insert_request to execlist_submit_request to > avoid bumping when re-ordering for priority. > * Support the counter on the ringbuffer submission path as well, albeit > just notionally. (Chris Wilson) > > v3: > * Rebase. > > v4: > * Rename and move the stats into a container structure. (Chris Wilson) > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem_request.c | 7 +++++++ > drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++-- > drivers/gpu/drm/i915/intel_lrc.c | 2 ++ > drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +++++++++ > 4 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index a0f451b4a4e8..8da350bacff1 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -502,6 +502,9 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) > engine->emit_breadcrumb(request, > request->ring->vaddr + request->postfix); > > + GEM_BUG_ON(engine->request_stats.runnable == 0); > + engine->request_stats.runnable--; > + > spin_lock(&request->timeline->lock); > list_move_tail(&request->link, &timeline->requests); > spin_unlock(&request->timeline->lock); > @@ -517,6 +520,8 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) > /* Will be called from irq-context when using foreign fences. */ > spin_lock_irqsave(&engine->timeline->lock, flags); > > + engine->request_stats.runnable++; > + > __i915_gem_request_submit(request); > > spin_unlock_irqrestore(&engine->timeline->lock, flags); > @@ -548,6 +553,8 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) > timeline = request->timeline; > GEM_BUG_ON(timeline == engine->timeline); > > + engine->request_stats.runnable++; > + > spin_lock(&timeline->lock); > list_move(&request->link, &timeline->requests); > spin_unlock(&timeline->lock); > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 7eebfbb95e89..8377a77cfbe7 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -1731,12 +1731,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, > if (i915_terminally_wedged(&engine->i915->gpu_error)) > drm_printf(m, "*** WEDGED ***\n"); > > - drm_printf(m, " current seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", > + drm_printf(m, " current seqno %x, last %x, hangcheck %x [%d ms], inflight %d, runnable %u\n", > intel_engine_get_seqno(engine), > intel_engine_last_submit(engine), > engine->hangcheck.seqno, > jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), > - engine->timeline->inflight_seqnos); > + engine->timeline->inflight_seqnos, > + engine->request_stats.runnable); > drm_printf(m, " Reset count: %d (global %d)\n", > i915_reset_engine_count(error, engine), > i915_reset_count(error)); > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 51e61b04a555..319937e67a6e 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -965,6 +965,8 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) > /* Will be called from irq-context when using foreign fences. */ > spin_lock_irqsave(&engine->timeline->lock, flags); > > + engine->request_stats.runnable++; > + > insert_request(engine, &request->priotree, request->priotree.priority); > > GEM_BUG_ON(!engine->execlists.first); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 27a0c47db51e..d7ee7831288d 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -303,6 +303,15 @@ struct intel_engine_cs { > struct intel_ring *buffer; > struct intel_timeline *timeline; > > + struct { > + /** > + * @runnable: Number of runnable requests sent to the backend. > + * > + * Count of requests waiting for the GPU to execute them. > + */ > + unsigned int runnable; > + } request_stats; > + > struct drm_i915_gem_object *default_state; Just thinking about easy holes, probably want to keep the pointer above next to the other pointers. I'll let you argue cachelines ;) -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx