Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > The writing is on the wall for the existence of a single execution queue > along each engine, and as a consequence we will not be able to track > dependencies along the HW queue itself, i.e. we will not be able to use > HW semaphores on gen7 as they use a global set of registers (and unlike > gen8+ we can not effectively target memory to keep per-context seqno and > dependencies). > > On the positive side, when we implement request reordering for gen7 we > could also not presume a simple execution queue and would also require > removing the current semaphore generation code. So this bring us another > step closer to request reordering! > > The negative side is that using interrupts to drive inter-engine > synchronisation is much slower (4us -> 15us to do a nop on each of the 3 > engines on ivb). This is much better than it at the time of introducing > the HW semaphores and equally importantly userspace weaned itself of s/of/off ? > intermixing dependent BLT/RENDER operations (the prime culprit was glyph > rendering in UXA). So while we regress the microbenchmarks it should not > impact the user. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 19 +-- > drivers/gpu/drm/i915/i915_drv.c | 2 +- > drivers/gpu/drm/i915/i915_drv.h | 3 - > drivers/gpu/drm/i915/i915_gem.c | 4 +- > drivers/gpu/drm/i915/i915_request.c | 126 +--------------- > drivers/gpu/drm/i915/i915_timeline.h | 8 - > drivers/gpu/drm/i915/intel_engine_cs.c | 29 +--- > drivers/gpu/drm/i915/intel_hangcheck.c | 155 -------------------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 187 +----------------------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 56 +------ > 10 files changed, 15 insertions(+), 574 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 77486a614614..b0bdf3c0d776 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -1041,21 +1041,7 @@ static const struct file_operations i915_error_state_fops = { > static int > i915_next_seqno_set(void *data, u64 val) > { > - struct drm_i915_private *dev_priv = data; > - struct drm_device *dev = &dev_priv->drm; > - int ret; > - > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > - > - intel_runtime_pm_get(dev_priv); > - ret = i915_gem_set_global_seqno(dev, val); > - intel_runtime_pm_put(dev_priv); > - > - mutex_unlock(&dev->struct_mutex); > - > - return ret; > + return val ? 0 : -EINVAL; This begs to meet it's maker soon. I didn't find anything else in this patch, so looks ok and what a clump of compilated code we can get rid off! Code that we have dragged along with no small amount of suffering. I am in favour. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > } > > DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, > @@ -4219,9 +4205,6 @@ i915_drop_caches_set(void *data, u64 val) > I915_WAIT_LOCKED, > MAX_SCHEDULE_TIMEOUT); > > - if (ret == 0 && val & DROP_RESET_SEQNO) > - ret = i915_gem_set_global_seqno(&i915->drm, 1); > - > if (val & DROP_RETIRE) > i915_retire_requests(i915); > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index caa055ac9472..dcb935338c63 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -349,7 +349,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, > value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL); > break; > case I915_PARAM_HAS_SEMAPHORES: > - value = HAS_LEGACY_SEMAPHORES(dev_priv); > + value = 0; > break; > case I915_PARAM_HAS_SECURE_BATCHES: > value = capable(CAP_SYS_ADMIN); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 815db160b966..7da653ece4dd 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1948,7 +1948,6 @@ struct drm_i915_private { > struct list_head active_rings; > struct list_head closed_vma; > u32 active_requests; > - u32 request_serial; > > /** > * Is the GPU currently considered idle, or busy executing > @@ -2394,8 +2393,6 @@ intel_info(const struct drm_i915_private *dev_priv) > #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) > #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) > > -#define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN(dev_priv, 7) > - > #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) > #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) > #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d92147ab4489..f4254e012620 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3318,7 +3318,7 @@ static void nop_submit_request(struct i915_request *request) > > spin_lock_irqsave(&request->engine->timeline.lock, flags); > __i915_request_submit(request); > - intel_engine_init_global_seqno(request->engine, request->global_seqno); > + intel_engine_write_global_seqno(request->engine, request->global_seqno); > spin_unlock_irqrestore(&request->engine->timeline.lock, flags); > } > > @@ -3359,7 +3359,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) > > /* > * Make sure no request can slip through without getting completed by > - * either this call here to intel_engine_init_global_seqno, or the one > + * either this call here to intel_engine_write_global_seqno, or the one > * in nop_submit_request. > */ > synchronize_rcu(); > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 637909c59f1f..6cedcfea33b5 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -111,99 +111,10 @@ i915_request_remove_from_client(struct i915_request *request) > spin_unlock(&file_priv->mm.lock); > } > > -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) > +static void reserve_gt(struct drm_i915_private *i915) > { > - struct intel_engine_cs *engine; > - struct i915_timeline *timeline; > - enum intel_engine_id id; > - int ret; > - > - /* Carefully retire all requests without writing to the rings */ > - ret = i915_gem_wait_for_idle(i915, > - I915_WAIT_INTERRUPTIBLE | > - I915_WAIT_LOCKED, > - MAX_SCHEDULE_TIMEOUT); > - if (ret) > - return ret; > - > - GEM_BUG_ON(i915->gt.active_requests); > - > - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ > - for_each_engine(engine, i915, id) { > - GEM_TRACE("%s seqno %d (current %d) -> %d\n", > - engine->name, > - engine->timeline.seqno, > - intel_engine_get_seqno(engine), > - seqno); > - > - if (seqno == engine->timeline.seqno) > - continue; > - > - kthread_park(engine->breadcrumbs.signaler); > - > - if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { > - /* Flush any waiters before we reuse the seqno */ > - intel_engine_disarm_breadcrumbs(engine); > - intel_engine_init_hangcheck(engine); > - GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); > - } > - > - /* Check we are idle before we fiddle with hw state! */ > - GEM_BUG_ON(!intel_engine_is_idle(engine)); > - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); > - > - /* Finally reset hw state */ > - intel_engine_init_global_seqno(engine, seqno); > - engine->timeline.seqno = seqno; > - > - kthread_unpark(engine->breadcrumbs.signaler); > - } > - > - list_for_each_entry(timeline, &i915->gt.timelines, link) > - memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); > - > - i915->gt.request_serial = seqno; > - > - return 0; > -} > - > -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) > -{ > - struct drm_i915_private *i915 = to_i915(dev); > - > - lockdep_assert_held(&i915->drm.struct_mutex); > - > - if (seqno == 0) > - return -EINVAL; > - > - /* HWS page needs to be set less than what we will inject to ring */ > - return reset_all_global_seqno(i915, seqno - 1); > -} > - > -static int reserve_gt(struct drm_i915_private *i915) > -{ > - int ret; > - > - /* > - * Reservation is fine until we may need to wrap around > - * > - * By incrementing the serial for every request, we know that no > - * individual engine may exceed that serial (as each is reset to 0 > - * on any wrap). This protects even the most pessimistic of migrations > - * of every request from all engines onto just one. > - */ > - while (unlikely(++i915->gt.request_serial == 0)) { > - ret = reset_all_global_seqno(i915, 0); > - if (ret) { > - i915->gt.request_serial--; > - return ret; > - } > - } > - > if (!i915->gt.active_requests++) > i915_gem_unpark(i915); > - > - return 0; > } > > static void unreserve_gt(struct drm_i915_private *i915) > @@ -608,9 +519,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > if (IS_ERR(ce)) > return ERR_CAST(ce); > > - ret = reserve_gt(i915); > - if (ret) > - goto err_unpin; > + reserve_gt(i915); > > ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST); > if (ret) > @@ -743,7 +652,6 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) > kmem_cache_free(i915->requests, rq); > err_unreserve: > unreserve_gt(i915); > -err_unpin: > intel_context_unpin(ce); > return ERR_PTR(ret); > } > @@ -771,34 +679,12 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) > ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, > &from->submit, > I915_FENCE_GFP); > - return ret < 0 ? ret : 0; > - } > - > - if (to->engine->semaphore.sync_to) { > - u32 seqno; > - > - GEM_BUG_ON(!from->engine->semaphore.signal); > - > - seqno = i915_request_global_seqno(from); > - if (!seqno) > - goto await_dma_fence; > - > - if (seqno <= to->timeline->global_sync[from->engine->id]) > - return 0; > - > - trace_i915_gem_ring_sync_to(to, from); > - ret = to->engine->semaphore.sync_to(to, from); > - if (ret) > - return ret; > - > - to->timeline->global_sync[from->engine->id] = seqno; > - return 0; > + } else { > + ret = i915_sw_fence_await_dma_fence(&to->submit, > + &from->fence, 0, > + I915_FENCE_GFP); > } > > -await_dma_fence: > - ret = i915_sw_fence_await_dma_fence(&to->submit, > - &from->fence, 0, > - I915_FENCE_GFP); > return ret < 0 ? ret : 0; > } > > diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h > index ebd71b487220..38c1e15e927a 100644 > --- a/drivers/gpu/drm/i915/i915_timeline.h > +++ b/drivers/gpu/drm/i915/i915_timeline.h > @@ -63,14 +63,6 @@ struct i915_timeline { > * redundant and we can discard it without loss of generality. > */ > struct i915_syncmap *sync; > - /** > - * Separately to the inter-context seqno map above, we track the last > - * barrier (e.g. semaphore wait) to the global engine timelines. Note > - * that this tracks global_seqno rather than the context.seqno, and > - * so it is subject to the limitations of hw wraparound and that we > - * may need to revoke global_seqno (on pre-emption). > - */ > - u32 global_sync[I915_NUM_ENGINES]; > > struct list_head link; > const char *name; > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index d3dec31df123..1462bb49f420 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -454,25 +454,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv) > return err; > } > > -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) > +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) > { > - struct drm_i915_private *dev_priv = engine->i915; > - > - /* Our semaphore implementation is strictly monotonic (i.e. we proceed > - * so long as the semaphore value in the register/page is greater > - * than the sync value), so whenever we reset the seqno, > - * so long as we reset the tracking semaphore value to 0, it will > - * always be before the next request's seqno. If we don't reset > - * the semaphore value, then when the seqno moves backwards all > - * future waits will complete instantly (causing rendering corruption). > - */ > - if (IS_GEN_RANGE(dev_priv, 6, 7)) { > - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); > - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); > - if (HAS_VEBOX(dev_priv)) > - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); > - } > - > intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); > clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); > > @@ -1300,16 +1283,6 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, > drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine)); > } > > - if (HAS_LEGACY_SEMAPHORES(dev_priv)) { > - drm_printf(m, "\tSYNC_0: 0x%08x\n", > - I915_READ(RING_SYNC_0(engine->mmio_base))); > - drm_printf(m, "\tSYNC_1: 0x%08x\n", > - I915_READ(RING_SYNC_1(engine->mmio_base))); > - if (HAS_VEBOX(dev_priv)) > - drm_printf(m, "\tSYNC_2: 0x%08x\n", > - I915_READ(RING_SYNC_2(engine->mmio_base))); > - } > - > addr = intel_engine_get_active_head(engine); > drm_printf(m, "\tACTHD: 0x%08x_%08x\n", > upper_32_bits(addr), lower_32_bits(addr)); > diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c > index 495fa145f37f..c3f929f59424 100644 > --- a/drivers/gpu/drm/i915/intel_hangcheck.c > +++ b/drivers/gpu/drm/i915/intel_hangcheck.c > @@ -24,144 +24,6 @@ > > #include "i915_drv.h" > > -static bool > -ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) > -{ > - ipehr &= ~MI_SEMAPHORE_SYNC_MASK; > - return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | > - MI_SEMAPHORE_REGISTER); > -} > - > -static struct intel_engine_cs * > -semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, > - u64 offset) > -{ > - struct drm_i915_private *dev_priv = engine->i915; > - u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; > - struct intel_engine_cs *signaller; > - enum intel_engine_id id; > - > - for_each_engine(signaller, dev_priv, id) { > - if (engine == signaller) > - continue; > - > - if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) > - return signaller; > - } > - > - DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x\n", > - engine->name, ipehr); > - > - return ERR_PTR(-ENODEV); > -} > - > -static struct intel_engine_cs * > -semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) > -{ > - struct drm_i915_private *dev_priv = engine->i915; > - void __iomem *vaddr; > - u32 cmd, ipehr, head; > - u64 offset = 0; > - int i, backwards; > - > - /* > - * This function does not support execlist mode - any attempt to > - * proceed further into this function will result in a kernel panic > - * when dereferencing ring->buffer, which is not set up in execlist > - * mode. > - * > - * The correct way of doing it would be to derive the currently > - * executing ring buffer from the current context, which is derived > - * from the currently running request. Unfortunately, to get the > - * current request we would have to grab the struct_mutex before doing > - * anything else, which would be ill-advised since some other thread > - * might have grabbed it already and managed to hang itself, causing > - * the hang checker to deadlock. > - * > - * Therefore, this function does not support execlist mode in its > - * current form. Just return NULL and move on. > - */ > - if (engine->buffer == NULL) > - return NULL; > - > - ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); > - if (!ipehr_is_semaphore_wait(engine, ipehr)) > - return NULL; > - > - /* > - * HEAD is likely pointing to the dword after the actual command, > - * so scan backwards until we find the MBOX. But limit it to just 3 > - * or 4 dwords depending on the semaphore wait command size. > - * Note that we don't care about ACTHD here since that might > - * point at at batch, and semaphores are always emitted into the > - * ringbuffer itself. > - */ > - head = I915_READ_HEAD(engine) & HEAD_ADDR; > - backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; > - vaddr = (void __iomem *)engine->buffer->vaddr; > - > - for (i = backwards; i; --i) { > - /* > - * Be paranoid and presume the hw has gone off into the wild - > - * our ring is smaller than what the hardware (and hence > - * HEAD_ADDR) allows. Also handles wrap-around. > - */ > - head &= engine->buffer->size - 1; > - > - /* This here seems to blow up */ > - cmd = ioread32(vaddr + head); > - if (cmd == ipehr) > - break; > - > - head -= 4; > - } > - > - if (!i) > - return NULL; > - > - *seqno = ioread32(vaddr + head + 4) + 1; > - return semaphore_wait_to_signaller_ring(engine, ipehr, offset); > -} > - > -static int semaphore_passed(struct intel_engine_cs *engine) > -{ > - struct drm_i915_private *dev_priv = engine->i915; > - struct intel_engine_cs *signaller; > - u32 seqno; > - > - engine->hangcheck.deadlock++; > - > - signaller = semaphore_waits_for(engine, &seqno); > - if (signaller == NULL) > - return -1; > - > - if (IS_ERR(signaller)) > - return 0; > - > - /* Prevent pathological recursion due to driver bugs */ > - if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) > - return -1; > - > - if (intel_engine_signaled(signaller, seqno)) > - return 1; > - > - /* cursory check for an unkickable deadlock */ > - if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE && > - semaphore_passed(signaller) < 0) > - return -1; > - > - return 0; > -} > - > -static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) > -{ > - struct intel_engine_cs *engine; > - enum intel_engine_id id; > - > - for_each_engine(engine, dev_priv, id) > - engine->hangcheck.deadlock = 0; > -} > - > static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) > { > u32 tmp = current_instdone | *old_instdone; > @@ -252,21 +114,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) > return ENGINE_WAIT_KICK; > } > > - if (IS_GEN_RANGE(dev_priv, 6, 7) && tmp & RING_WAIT_SEMAPHORE) { > - switch (semaphore_passed(engine)) { > - default: > - return ENGINE_DEAD; > - case 1: > - i915_handle_error(dev_priv, ALL_ENGINES, 0, > - "stuck semaphore on %s", > - engine->name); > - I915_WRITE_CTL(engine, tmp); > - return ENGINE_WAIT_KICK; > - case 0: > - return ENGINE_WAIT; > - } > - } > - > return ENGINE_DEAD; > } > > @@ -433,8 +280,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > for_each_engine(engine, dev_priv, id) { > struct intel_engine_hangcheck hc; > > - semaphore_clear_deadlocks(dev_priv); > - > hangcheck_load_sample(engine, &hc); > hangcheck_accumulate_sample(engine, &hc); > hangcheck_store_sample(engine, &hc); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 65fd92eb071d..d5a9edbcf1be 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -556,13 +556,6 @@ static int init_ring_common(struct intel_engine_cs *engine) > > intel_engine_reset_breadcrumbs(engine); > > - if (HAS_LEGACY_SEMAPHORES(engine->i915)) { > - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); > - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); > - if (HAS_VEBOX(dev_priv)) > - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); > - } > - > /* Enforce ordering by reading HEAD register back */ > I915_READ_HEAD(engine); > > @@ -745,33 +738,6 @@ static int init_render_ring(struct intel_engine_cs *engine) > return 0; > } > > -static u32 *gen6_signal(struct i915_request *rq, u32 *cs) > -{ > - struct drm_i915_private *dev_priv = rq->i915; > - struct intel_engine_cs *engine; > - enum intel_engine_id id; > - int num_rings = 0; > - > - for_each_engine(engine, dev_priv, id) { > - i915_reg_t mbox_reg; > - > - if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) > - continue; > - > - mbox_reg = rq->engine->semaphore.mbox.signal[engine->hw_id]; > - if (i915_mmio_reg_valid(mbox_reg)) { > - *cs++ = MI_LOAD_REGISTER_IMM(1); > - *cs++ = i915_mmio_reg_offset(mbox_reg); > - *cs++ = rq->global_seqno; > - num_rings++; > - } > - } > - if (num_rings & 1) > - *cs++ = MI_NOOP; > - > - return cs; > -} > - > static void cancel_requests(struct intel_engine_cs *engine) > { > struct i915_request *request; > @@ -822,39 +788,6 @@ static void i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs) > > static const int i9xx_emit_breadcrumb_sz = 4; > > -static void gen6_sema_emit_breadcrumb(struct i915_request *rq, u32 *cs) > -{ > - return i9xx_emit_breadcrumb(rq, rq->engine->semaphore.signal(rq, cs)); > -} > - > -static int > -gen6_ring_sync_to(struct i915_request *rq, struct i915_request *signal) > -{ > - u32 dw1 = MI_SEMAPHORE_MBOX | > - MI_SEMAPHORE_COMPARE | > - MI_SEMAPHORE_REGISTER; > - u32 wait_mbox = signal->engine->semaphore.mbox.wait[rq->engine->hw_id]; > - u32 *cs; > - > - WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); > - > - cs = intel_ring_begin(rq, 4); > - if (IS_ERR(cs)) > - return PTR_ERR(cs); > - > - *cs++ = dw1 | wait_mbox; > - /* Throughout all of the GEM code, seqno passed implies our current > - * seqno is >= the last seqno executed. However for hardware the > - * comparison is strictly greater than. > - */ > - *cs++ = signal->global_seqno - 1; > - *cs++ = 0; > - *cs++ = MI_NOOP; > - intel_ring_advance(rq, cs); > - > - return 0; > -} > - > static void > gen5_seqno_barrier(struct intel_engine_cs *engine) > { > @@ -1602,12 +1535,6 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) > { > struct drm_i915_private *i915 = rq->i915; > struct intel_engine_cs *engine = rq->engine; > - enum intel_engine_id id; > - const int num_rings = > - /* Use an extended w/a on gen7 if signalling from other rings */ > - (HAS_LEGACY_SEMAPHORES(i915) && IS_GEN(i915, 7)) ? > - INTEL_INFO(i915)->num_rings - 1 : > - 0; > bool force_restore = false; > int len; > u32 *cs; > @@ -1621,7 +1548,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) > > len = 4; > if (IS_GEN(i915, 7)) > - len += 2 + (num_rings ? 4*num_rings + 6 : 0); > + len += 2; > if (flags & MI_FORCE_RESTORE) { > GEM_BUG_ON(flags & MI_RESTORE_INHIBIT); > flags &= ~MI_FORCE_RESTORE; > @@ -1634,23 +1561,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) > return PTR_ERR(cs); > > /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ > - if (IS_GEN(i915, 7)) { > + if (IS_GEN(i915, 7)) > *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; > - if (num_rings) { > - struct intel_engine_cs *signaller; > - > - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); > - for_each_engine(signaller, i915, id) { > - if (signaller == engine) > - continue; > - > - *cs++ = i915_mmio_reg_offset( > - RING_PSMI_CTL(signaller->mmio_base)); > - *cs++ = _MASKED_BIT_ENABLE( > - GEN6_PSMI_SLEEP_MSG_DISABLE); > - } > - } > - } > > if (force_restore) { > /* > @@ -1681,30 +1593,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) > */ > *cs++ = MI_NOOP; > > - if (IS_GEN(i915, 7)) { > - if (num_rings) { > - struct intel_engine_cs *signaller; > - i915_reg_t last_reg = {}; /* keep gcc quiet */ > - > - *cs++ = MI_LOAD_REGISTER_IMM(num_rings); > - for_each_engine(signaller, i915, id) { > - if (signaller == engine) > - continue; > - > - last_reg = RING_PSMI_CTL(signaller->mmio_base); > - *cs++ = i915_mmio_reg_offset(last_reg); > - *cs++ = _MASKED_BIT_DISABLE( > - GEN6_PSMI_SLEEP_MSG_DISABLE); > - } > - > - /* Insert a delay before the next switch! */ > - *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; > - *cs++ = i915_mmio_reg_offset(last_reg); > - *cs++ = i915_scratch_offset(rq->i915); > - *cs++ = MI_NOOP; > - } > + if (IS_GEN(i915, 7)) > *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > - } > > intel_ring_advance(rq, cs); > > @@ -2154,66 +2044,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode) > return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); > } > > -static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, > - struct intel_engine_cs *engine) > -{ > - int i; > - > - if (!HAS_LEGACY_SEMAPHORES(dev_priv)) > - return; > - > - GEM_BUG_ON(INTEL_GEN(dev_priv) < 6); > - engine->semaphore.sync_to = gen6_ring_sync_to; > - engine->semaphore.signal = gen6_signal; > - > - /* > - * The current semaphore is only applied on pre-gen8 > - * platform. And there is no VCS2 ring on the pre-gen8 > - * platform. So the semaphore between RCS and VCS2 is > - * initialized as INVALID. > - */ > - for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { > - static const struct { > - u32 wait_mbox; > - i915_reg_t mbox_reg; > - } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { > - [RCS_HW] = { > - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, > - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, > - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, > - }, > - [VCS_HW] = { > - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, > - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, > - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, > - }, > - [BCS_HW] = { > - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, > - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, > - [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, > - }, > - [VECS_HW] = { > - [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, > - [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, > - [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, > - }, > - }; > - u32 wait_mbox; > - i915_reg_t mbox_reg; > - > - if (i == engine->hw_id) { > - wait_mbox = MI_SEMAPHORE_SYNC_INVALID; > - mbox_reg = GEN6_NOSYNC; > - } else { > - wait_mbox = sem_data[engine->hw_id][i].wait_mbox; > - mbox_reg = sem_data[engine->hw_id][i].mbox_reg; > - } > - > - engine->semaphore.mbox.wait[i] = wait_mbox; > - engine->semaphore.mbox.signal[i] = mbox_reg; > - } > -} > - > static void intel_ring_init_irq(struct drm_i915_private *dev_priv, > struct intel_engine_cs *engine) > { > @@ -2256,7 +2086,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, > GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8); > > intel_ring_init_irq(dev_priv, engine); > - intel_ring_init_semaphores(dev_priv, engine); > > engine->init_hw = init_ring_common; > engine->reset.prepare = reset_prepare; > @@ -2268,16 +2097,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, > > engine->emit_breadcrumb = i9xx_emit_breadcrumb; > engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; > - if (HAS_LEGACY_SEMAPHORES(dev_priv)) { > - int num_rings; > - > - engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; > - > - num_rings = INTEL_INFO(dev_priv)->num_rings - 1; > - engine->emit_breadcrumb_sz += num_rings * 3; > - if (num_rings & 1) > - engine->emit_breadcrumb_sz++; > - } > > engine->set_default_submission = i9xx_set_default_submission; > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 6b41b9ce5f5b..c927bdfb1ed0 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -510,60 +510,6 @@ struct intel_engine_cs { > void (*irq_seqno_barrier)(struct intel_engine_cs *engine); > void (*cleanup)(struct intel_engine_cs *engine); > > - /* GEN8 signal/wait table - never trust comments! > - * signal to signal to signal to signal to signal to > - * RCS VCS BCS VECS VCS2 > - * -------------------------------------------------------------------- > - * RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | > - * |------------------------------------------------------------------- > - * VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | > - * |------------------------------------------------------------------- > - * BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | > - * |------------------------------------------------------------------- > - * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | > - * |------------------------------------------------------------------- > - * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | > - * |------------------------------------------------------------------- > - * > - * Generalization: > - * f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) > - * ie. transpose of g(x, y) > - * > - * sync from sync from sync from sync from sync from > - * RCS VCS BCS VECS VCS2 > - * -------------------------------------------------------------------- > - * RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | > - * |------------------------------------------------------------------- > - * VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | > - * |------------------------------------------------------------------- > - * BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | > - * |------------------------------------------------------------------- > - * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | > - * |------------------------------------------------------------------- > - * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | > - * |------------------------------------------------------------------- > - * > - * Generalization: > - * g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) > - * ie. transpose of f(x, y) > - */ > - struct { > -#define GEN6_SEMAPHORE_LAST VECS_HW > -#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) > -#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) > - struct { > - /* our mbox written by others */ > - u32 wait[GEN6_NUM_SEMAPHORES]; > - /* mboxes this ring signals to */ > - i915_reg_t signal[GEN6_NUM_SEMAPHORES]; > - } mbox; > - > - /* AKA wait() */ > - int (*sync_to)(struct i915_request *rq, > - struct i915_request *signal); > - u32 *(*signal)(struct i915_request *rq, u32 *cs); > - } semaphore; > - > struct intel_engine_execlists execlists; > > /* Contexts are pinned whilst they are active on the GPU. The last > @@ -889,7 +835,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) > return tail; > } > > -void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); > +void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno); > > void intel_engine_setup_common(struct intel_engine_cs *engine); > int intel_engine_init_common(struct intel_engine_cs *engine); > -- > 2.20.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx