On Mon, Oct 06, 2014 at 03:15:12PM +0100, John.C.Harrison@xxxxxxxxx wrote: > From: John Harrison <John.C.Harrison@xxxxxxxxx> > > For: VIZ-4377 > Signed-off-by: John.C.Harrison@xxxxxxxxx given the scary commit message for patch 3 I'm confused that we can just do this. Or maybe we should move patch 3 right before this one here and instead of mentioning fun in combination with the scheduler as the justification use this patch here. Since obviously if we dig out the ols from the plr then they obviously need to be allocated together. In any case this patch here should at least mention why this is suddenly possible without fuzz. A few comments below. -Daniel > --- > drivers/gpu/drm/i915/i915_gem.c | 13 +++----- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- > drivers/gpu/drm/i915/intel_display.c | 2 +- > drivers/gpu/drm/i915/intel_lrc.c | 20 ++++-------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 48 +++++++++++++++------------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 13 ++------ > 6 files changed, 41 insertions(+), 57 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 53b48ad..0d0eb26 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1108,7 +1108,7 @@ i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno) > BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); > > ret = 0; > - if (seqno == ring->outstanding_lazy_seqno) > + if (seqno == i915_gem_request_get_seqno(ring->outstanding_lazy_request)) > ret = i915_add_request(ring, NULL); > > return ret; > @@ -2344,7 +2344,7 @@ int __i915_add_request(struct intel_engine_cs *ring, > u32 request_ring_position, request_start; > int ret; > > - request = ring->preallocated_lazy_request; > + request = ring->outstanding_lazy_request; > if (WARN_ON(request == NULL)) > return -ENOMEM; > > @@ -2391,7 +2391,6 @@ int __i915_add_request(struct intel_engine_cs *ring, > > i915_gem_request_reference(request); > > - request->seqno = intel_ring_get_seqno(ring); > request->ring = ring; > request->head = request_start; > request->tail = request_ring_position; > @@ -2428,8 +2427,7 @@ int __i915_add_request(struct intel_engine_cs *ring, > } > > trace_i915_gem_request_add(ring, request->seqno); > - ring->outstanding_lazy_seqno = 0; > - ring->preallocated_lazy_request = NULL; > + ring->outstanding_lazy_request = NULL; > > if (!dev_priv->ums.mm_suspended) { > i915_queue_hangcheck(ring->dev); > @@ -2605,9 +2603,8 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, > } > > /* These may not have been flush before the reset, do so now */ > - kfree(ring->preallocated_lazy_request); > - ring->preallocated_lazy_request = NULL; > - ring->outstanding_lazy_seqno = 0; > + kfree(ring->outstanding_lazy_request); > + ring->outstanding_lazy_request = NULL; Somewhat unrelated, but shouldn't we move to refcount even the ring->plr? If we allocate it then the ring->plr pointer would hold the ref, then it gets transferred to the request list. This isn't too terribly important yet, but sometimes (when we have per-engine reset) we need to fix up all the request access the hangcheck and error state capture code does. Which probably means we really need to refcount requests super-carefully everywhere (because the hangcheck can happen almost anywhere at all). > } > > void i915_gem_restore_fences(struct drm_device *dev) > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 4250211..e0fdfae 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -1170,7 +1170,7 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, > return ret; > } > > - trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); > + trace_i915_gem_ring_dispatch(ring, i915_gem_request_get_seqno(intel_ring_get_request(ring)), flags); > > i915_gem_execbuffer_move_to_active(vmas, ring); > i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 5aae3d1e..6da18c5 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -10053,7 +10053,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > if (ret) > goto cleanup_unpin; > > - work->flip_queued_seqno = intel_ring_get_seqno(ring); > + work->flip_queued_seqno = i915_gem_request_get_seqno(intel_ring_get_request(ring)); > work->flip_queued_ring = ring; > } > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index b5b6430..7005745 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -799,16 +799,8 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > struct drm_i915_gem_request *request; > int ret; > > - /* The aim is to replace seqno values with request structures. A step > - * along the way is to switch to using the PLR in preference to the > - * OLS. That requires the PLR to only be valid when the OLS is also > - * valid. I.e., the two must be kept in step. */ If you add a comment only to remove it later on in the series I usually smash an "XXX: This will get cleaned up further" or similar on top of it. it aids review if you can gauge this whitout peeking ahead. > - > - if (ring->outstanding_lazy_seqno) { > - BUG_ON(ring->preallocated_lazy_request == NULL); > + if (ring->outstanding_lazy_request) > return 0; > - } > - BUG_ON(ring->preallocated_lazy_request != NULL); > > request = kmalloc(sizeof(*request), GFP_KERNEL); > if (request == NULL) > @@ -816,7 +808,7 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > > kref_init(&request->ref); > > - ret = i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); > + ret = i915_gem_get_seqno(ring->dev, &request->seqno); > if (ret) { > kfree(request); > return ret; > @@ -829,7 +821,7 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > request->ctx = ctx; > i915_gem_context_reference(request->ctx); > > - ring->preallocated_lazy_request = request; > + ring->outstanding_lazy_request = request; > return 0; > } > > @@ -1212,7 +1204,8 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) > (ring->status_page.gfx_addr + > (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT))); > intel_logical_ring_emit(ringbuf, 0); > - intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno); > + intel_logical_ring_emit(ringbuf, > + i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); > intel_logical_ring_emit(ringbuf, MI_NOOP); > intel_logical_ring_advance_and_submit(ringbuf); > @@ -1235,8 +1228,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) > > intel_logical_ring_stop(ring); > WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); > - ring->preallocated_lazy_request = NULL; > - ring->outstanding_lazy_seqno = 0; > + ring->outstanding_lazy_request = NULL; > > if (ring->cleanup) > ring->cleanup(ring); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 7350f78..dd59691 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -895,17 +895,20 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller, > return ret; > > for_each_ring(waiter, dev_priv, i) { > + u32 seqno; > u64 gtt_offset = signaller->semaphore.signal_ggtt[i]; > if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) > continue; > > + seqno = i915_gem_request_get_seqno( > + signaller->outstanding_lazy_request); > intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6)); > intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB | > PIPE_CONTROL_QW_WRITE | > PIPE_CONTROL_FLUSH_ENABLE); > intel_ring_emit(signaller, lower_32_bits(gtt_offset)); > intel_ring_emit(signaller, upper_32_bits(gtt_offset)); > - intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); > + intel_ring_emit(signaller, seqno); > intel_ring_emit(signaller, 0); > intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | > MI_SEMAPHORE_TARGET(waiter->id)); > @@ -933,16 +936,19 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller, > return ret; > > for_each_ring(waiter, dev_priv, i) { > + u32 seqno; > u64 gtt_offset = signaller->semaphore.signal_ggtt[i]; > if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) > continue; > > + seqno = i915_gem_request_get_seqno( > + signaller->outstanding_lazy_request); > intel_ring_emit(signaller, (MI_FLUSH_DW + 1) | > MI_FLUSH_DW_OP_STOREDW); > intel_ring_emit(signaller, lower_32_bits(gtt_offset) | > MI_FLUSH_DW_USE_GTT); > intel_ring_emit(signaller, upper_32_bits(gtt_offset)); > - intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); > + intel_ring_emit(signaller, seqno); > intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL | > MI_SEMAPHORE_TARGET(waiter->id)); > intel_ring_emit(signaller, 0); > @@ -971,9 +977,11 @@ static int gen6_signal(struct intel_engine_cs *signaller, > for_each_ring(useless, dev_priv, i) { > u32 mbox_reg = signaller->semaphore.mbox.signal[i]; > if (mbox_reg != GEN6_NOSYNC) { > + u32 seqno = i915_gem_request_get_seqno( > + signaller->outstanding_lazy_request); > intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); > intel_ring_emit(signaller, mbox_reg); > - intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); > + intel_ring_emit(signaller, seqno); > } > } > > @@ -1008,7 +1016,8 @@ gen6_add_request(struct intel_engine_cs *ring) > > intel_ring_emit(ring, MI_STORE_DWORD_INDEX); > intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); > - intel_ring_emit(ring, ring->outstanding_lazy_seqno); > + intel_ring_emit(ring, > + i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > intel_ring_emit(ring, MI_USER_INTERRUPT); > __intel_ring_advance(ring); > > @@ -1126,7 +1135,8 @@ pc_render_add_request(struct intel_engine_cs *ring) > PIPE_CONTROL_WRITE_FLUSH | > PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); > intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); > - intel_ring_emit(ring, ring->outstanding_lazy_seqno); > + intel_ring_emit(ring, > + i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > intel_ring_emit(ring, 0); > PIPE_CONTROL_FLUSH(ring, scratch_addr); > scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ > @@ -1145,7 +1155,8 @@ pc_render_add_request(struct intel_engine_cs *ring) > PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | > PIPE_CONTROL_NOTIFY); > intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); > - intel_ring_emit(ring, ring->outstanding_lazy_seqno); > + intel_ring_emit(ring, > + i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > intel_ring_emit(ring, 0); > __intel_ring_advance(ring); > > @@ -1385,7 +1396,8 @@ i9xx_add_request(struct intel_engine_cs *ring) > > intel_ring_emit(ring, MI_STORE_DWORD_INDEX); > intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); > - intel_ring_emit(ring, ring->outstanding_lazy_seqno); > + intel_ring_emit(ring, > + i915_gem_request_get_seqno(ring->outstanding_lazy_request)); > intel_ring_emit(ring, MI_USER_INTERRUPT); > __intel_ring_advance(ring); > > @@ -1839,8 +1851,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring) > WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); > > intel_destroy_ringbuffer_obj(ringbuf); > - ring->preallocated_lazy_request = NULL; > - ring->outstanding_lazy_seqno = 0; > + ring->outstanding_lazy_request = NULL; > > if (ring->cleanup) > ring->cleanup(ring); > @@ -1980,7 +1991,7 @@ int intel_ring_idle(struct intel_engine_cs *ring) > int ret; > > /* We need to add any requests required to flush the objects and ring */ > - if (ring->outstanding_lazy_seqno) { > + if (ring->outstanding_lazy_request) { > ret = i915_add_request(ring, NULL); > if (ret) > return ret; > @@ -2003,17 +2014,8 @@ intel_ring_alloc_seqno(struct intel_engine_cs *ring) > int ret; > struct drm_i915_gem_request *request; > > - /* The aim is to replace seqno values with request structures. A step > - * along the way is to switch to using the PLR in preference to the > - * OLS. That requires the PLR to only be valid when the OLS is also > - * valid. I.e., the two must be kept in step. */ > - > - if (ring->outstanding_lazy_seqno) { > - BUG_ON(ring->preallocated_lazy_request == NULL); > + if (ring->outstanding_lazy_request) > return 0; > - } > - > - BUG_ON(ring->preallocated_lazy_request != NULL); > > request = kmalloc(sizeof(*request), GFP_KERNEL); > if (request == NULL) > @@ -2021,13 +2023,13 @@ intel_ring_alloc_seqno(struct intel_engine_cs *ring) > > kref_init(&request->ref); > > - ret = i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); > + ret = i915_gem_get_seqno(ring->dev, &request->seqno); > if (ret) { > kfree(request); > return ret; > } > > - ring->preallocated_lazy_request = request; > + ring->outstanding_lazy_request = request; > return 0; > } > > @@ -2103,7 +2105,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > > - BUG_ON(ring->outstanding_lazy_seqno); > + BUG_ON(ring->outstanding_lazy_request); > > if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) { > I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index d98b964..98b219d 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -265,8 +265,7 @@ struct intel_engine_cs { > /** > * Do we have some not yet emitted requests outstanding? > */ > - struct drm_i915_gem_request *preallocated_lazy_request; > - u32 outstanding_lazy_seqno; > + struct drm_i915_gem_request *outstanding_lazy_request; > bool gpu_caches_dirty; > bool fbc_dirty; > > @@ -429,17 +428,11 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) > return ringbuf->tail; > } > > -static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring) > -{ > - BUG_ON(ring->outstanding_lazy_seqno == 0); > - return ring->outstanding_lazy_seqno; > -} > - > static inline struct drm_i915_gem_request * > intel_ring_get_request(struct intel_engine_cs *ring) > { > - BUG_ON(ring->preallocated_lazy_request == NULL); > - return ring->preallocated_lazy_request; > + BUG_ON(ring->outstanding_lazy_request == NULL); > + return ring->outstanding_lazy_request; > } > > static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno) > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx