On Thu, 15 Jun 2017, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > We need to keep track of the last location we ask the hw to read up to > (RING_TAIL) separately from our last write location into the ring, so > that in the event of a GPU reset we do not tell the HW to proceed into > a partially written request (which can happen if that request is waiting > for an external signal before being executed). > > v2: Refactor intel_ring_reset() (Mika) > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100144 > Testcase: igt/gem_exec_fence/await-hang > Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") > Fixes: d55ac5bf97c6 ("drm/i915: Defer transfer onto execution timeline to actual hw submission") > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > Link: http://patchwork.freedesktop.org/patch/msgid/20170425130049.26147-1-chris@xxxxxxxxxxxxxxxxxx > Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > (cherry picked from commit e6ba9992de6c63fe86c028b4876338e1cb7dac34) Thanks, picked up to drm-intel-fixes. BR, Jani. > --- > drivers/gpu/drm/i915/i915_gem_request.c | 2 +- > drivers/gpu/drm/i915/i915_guc_submission.c | 4 +-- > drivers/gpu/drm/i915/intel_lrc.c | 6 ++--- > drivers/gpu/drm/i915/intel_ringbuffer.c | 41 ++++++++++++++++++++---------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 19 ++++++++++++-- > 5 files changed, 48 insertions(+), 24 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c > index 5ddbc9499775..a74d0ac737cb 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.c > +++ b/drivers/gpu/drm/i915/i915_gem_request.c > @@ -623,7 +623,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, > * GPU processing the request, we never over-estimate the > * position of the head. > */ > - req->head = req->ring->tail; > + req->head = req->ring->emit; > > /* Check that we didn't interrupt ourselves with a new request */ > GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index 1642fff9cf13..ab5140ba108d 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client, > GEM_BUG_ON(freespace < wqi_size); > > /* The GuC firmware wants the tail index in QWords, not bytes */ > - tail = rq->tail; > - assert_ring_tail_valid(rq->ring, rq->tail); > - tail >>= 3; > + tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3; > GEM_BUG_ON(tail > WQ_RING_TAIL_MAX); > > /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index c8f7c631fc1f..10c63dbd617c 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -326,8 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) > rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; > u32 *reg_state = ce->lrc_reg_state; > > - assert_ring_tail_valid(rq->ring, rq->tail); > - reg_state[CTX_RING_TAIL+1] = rq->tail; > + reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); > > /* True 32b PPGTT with dynamic page allocation: update PDP > * registers and point the unallocated PDPs to scratch page. > @@ -2036,8 +2035,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) > ce->state->obj->mm.dirty = true; > i915_gem_object_unpin_map(ce->state->obj); > > - ce->ring->head = ce->ring->tail = 0; > - intel_ring_update_space(ce->ring); > + intel_ring_reset(ce->ring, 0); > } > } > } > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 66a2b8b83972..513a0f4b469b 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -49,7 +49,7 @@ static int __intel_ring_space(int head, int tail, int size) > > void intel_ring_update_space(struct intel_ring *ring) > { > - ring->space = __intel_ring_space(ring->head, ring->tail, ring->size); > + ring->space = __intel_ring_space(ring->head, ring->emit, ring->size); > } > > static int > @@ -774,8 +774,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) > > i915_gem_request_submit(request); > > - assert_ring_tail_valid(request->ring, request->tail); > - I915_WRITE_TAIL(request->engine, request->tail); > + I915_WRITE_TAIL(request->engine, > + intel_ring_set_tail(request->ring, request->tail)); > } > > static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) > @@ -1316,11 +1316,23 @@ int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias) > return PTR_ERR(addr); > } > > +void intel_ring_reset(struct intel_ring *ring, u32 tail) > +{ > + GEM_BUG_ON(!list_empty(&ring->request_list)); > + ring->tail = tail; > + ring->head = tail; > + ring->emit = tail; > + intel_ring_update_space(ring); > +} > + > void intel_ring_unpin(struct intel_ring *ring) > { > GEM_BUG_ON(!ring->vma); > GEM_BUG_ON(!ring->vaddr); > > + /* Discard any unused bytes beyond that submitted to hw. */ > + intel_ring_reset(ring, ring->tail); > + > if (i915_vma_is_map_and_fenceable(ring->vma)) > i915_vma_unpin_iomap(ring->vma); > else > @@ -1562,8 +1574,9 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) > struct intel_engine_cs *engine; > enum intel_engine_id id; > > + /* Restart from the beginning of the rings for convenience */ > for_each_engine(engine, dev_priv, id) > - engine->buffer->head = engine->buffer->tail; > + intel_ring_reset(engine->buffer, 0); > } > > static int ring_request_alloc(struct drm_i915_gem_request *request) > @@ -1616,7 +1629,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) > unsigned space; > > /* Would completion of this request free enough space? */ > - space = __intel_ring_space(target->postfix, ring->tail, > + space = __intel_ring_space(target->postfix, ring->emit, > ring->size); > if (space >= bytes) > break; > @@ -1641,8 +1654,8 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) > u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) > { > struct intel_ring *ring = req->ring; > - int remain_actual = ring->size - ring->tail; > - int remain_usable = ring->effective_size - ring->tail; > + int remain_actual = ring->size - ring->emit; > + int remain_usable = ring->effective_size - ring->emit; > int bytes = num_dwords * sizeof(u32); > int total_bytes, wait_bytes; > bool need_wrap = false; > @@ -1678,17 +1691,17 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) > > if (unlikely(need_wrap)) { > GEM_BUG_ON(remain_actual > ring->space); > - GEM_BUG_ON(ring->tail + remain_actual > ring->size); > + GEM_BUG_ON(ring->emit + remain_actual > ring->size); > > /* Fill the tail with MI_NOOP */ > - memset(ring->vaddr + ring->tail, 0, remain_actual); > - ring->tail = 0; > + memset(ring->vaddr + ring->emit, 0, remain_actual); > + ring->emit = 0; > ring->space -= remain_actual; > } > > - GEM_BUG_ON(ring->tail > ring->size - bytes); > - cs = ring->vaddr + ring->tail; > - ring->tail += bytes; > + GEM_BUG_ON(ring->emit > ring->size - bytes); > + cs = ring->vaddr + ring->emit; > + ring->emit += bytes; > ring->space -= bytes; > GEM_BUG_ON(ring->space < 0); > > @@ -1699,7 +1712,7 @@ u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) > int intel_ring_cacheline_align(struct drm_i915_gem_request *req) > { > int num_dwords = > - (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); > + (req->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); > u32 *cs; > > if (num_dwords == 0) > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index a82a0807f64d..f7144fe09613 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -145,6 +145,7 @@ struct intel_ring { > > u32 head; > u32 tail; > + u32 emit; > > int space; > int size; > @@ -488,6 +489,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) > struct intel_ring * > intel_engine_create_ring(struct intel_engine_cs *engine, int size); > int intel_ring_pin(struct intel_ring *ring, unsigned int offset_bias); > +void intel_ring_reset(struct intel_ring *ring, u32 tail); > +void intel_ring_update_space(struct intel_ring *ring); > void intel_ring_unpin(struct intel_ring *ring); > void intel_ring_free(struct intel_ring *ring); > > @@ -511,7 +514,7 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) > * reserved for the command packet (i.e. the value passed to > * intel_ring_begin()). > */ > - GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); > + GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs); > } > > static inline u32 > @@ -540,7 +543,19 @@ assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) > GEM_BUG_ON(tail >= ring->size); > } > > -void intel_ring_update_space(struct intel_ring *ring); > +static inline unsigned int > +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) > +{ > + /* Whilst writes to the tail are strictly order, there is no > + * serialisation between readers and the writers. The tail may be > + * read by i915_gem_request_retire() just as it is being updated > + * by execlists, as although the breadcrumb is complete, the context > + * switch hasn't been seen. > + */ > + assert_ring_tail_valid(ring, tail); > + ring->tail = tail; > + return tail; > +} > > void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); -- Jani Nikula, Intel Open Source Technology Center _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx