Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Split the legacy submission backend from the common ring buffer. Aye. Didn't spot anything out of ordinary. Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/Makefile | 5 +- > drivers/gpu/drm/i915/display/intel_overlay.c | 1 + > drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 +- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 + > .../gpu/drm/i915/gem/i915_gem_object_blt.c | 1 + > .../i915/gem/selftests/i915_gem_coherency.c | 1 + > drivers/gpu/drm/i915/gt/intel_context.c | 1 + > drivers/gpu/drm/i915/gt/intel_context.h | 1 + > drivers/gpu/drm/i915/gt/intel_engine.h | 114 ------- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 + > drivers/gpu/drm/i915/gt/intel_engine_pm.c | 1 + > drivers/gpu/drm/i915/gt/intel_engine_types.h | 27 +- > drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + > drivers/gpu/drm/i915/gt/intel_mocs.c | 1 + > drivers/gpu/drm/i915/gt/intel_renderstate.c | 1 + > drivers/gpu/drm/i915/gt/intel_ring.c | 314 ++++++++++++++++++ > drivers/gpu/drm/i915/gt/intel_ring.h | 131 ++++++++ > drivers/gpu/drm/i915/gt/intel_timeline.c | 6 +- > drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + > drivers/gpu/drm/i915/gt/mock_engine.c | 1 + > drivers/gpu/drm/i915/gt/selftest_timeline.c | 1 + > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 3 +- > drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 + > drivers/gpu/drm/i915/gvt/mmio_context.c | 1 + > drivers/gpu/drm/i915/gvt/scheduler.c | 1 + > drivers/gpu/drm/i915/i915_active.c | 1 + > drivers/gpu/drm/i915/i915_perf.c | 1 + > drivers/gpu/drm/i915/i915_request.c | 1 + > 28 files changed, 477 insertions(+), 147 deletions(-) > create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.c > create mode 100644 drivers/gpu/drm/i915/gt/intel_ring.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index a91e0a487a79..5021aa7fa187 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -90,11 +90,12 @@ gt-y += \ > gt/intel_gt_requests.o \ > gt/intel_llc.o \ > gt/intel_lrc.o \ > + gt/intel_mocs.o \ > gt/intel_rc6.o \ > gt/intel_renderstate.o \ > gt/intel_reset.o \ > - gt/intel_ringbuffer.o \ > - gt/intel_mocs.o \ > + gt/intel_ring.o \ > + gt/intel_ring_submission.o \ > gt/intel_sseu.o \ > gt/intel_timeline.o \ > gt/intel_workarounds.o > diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c > index 2360f19f9694..848ce07a8ec2 100644 > --- a/drivers/gpu/drm/i915/display/intel_overlay.c > +++ b/drivers/gpu/drm/i915/display/intel_overlay.c > @@ -30,6 +30,7 @@ > #include <drm/i915_drm.h> > > #include "gem/i915_gem_pm.h" > +#include "gt/intel_ring.h" > > #include "i915_drv.h" > #include "i915_reg.h" > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index df528e48e566..a03bee30fac1 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -69,9 +69,10 @@ > > #include <drm/i915_drm.h> > > -#include "gt/intel_lrc_reg.h" > #include "gt/intel_engine_heartbeat.h" > #include "gt/intel_engine_user.h" > +#include "gt/intel_lrc_reg.h" > +#include "gt/intel_ring.h" > > #include "i915_gem_context.h" > #include "i915_globals.h" > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 99d79f94e641..c88948e4094b 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -19,6 +19,7 @@ > #include "gt/intel_engine_pool.h" > #include "gt/intel_gt.h" > #include "gt/intel_gt_pm.h" > +#include "gt/intel_ring.h" > > #include "i915_drv.h" > #include "i915_gem_clflush.h" > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > index 5bd8de124d74..516e61e99212 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c > @@ -8,6 +8,7 @@ > #include "gt/intel_engine_pm.h" > #include "gt/intel_engine_pool.h" > #include "gt/intel_gt.h" > +#include "gt/intel_ring.h" > #include "i915_gem_clflush.h" > #include "i915_gem_object_blt.h" > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c > index 549810f70aeb..0877ef4dff63 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c > @@ -8,6 +8,7 @@ > > #include "gt/intel_gt.h" > #include "gt/intel_gt_pm.h" > +#include "gt/intel_ring.h" > > #include "i915_selftest.h" > #include "selftests/i915_random.h" > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > index 59c3083c1ec1..ee9d2bcd2c13 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.c > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > @@ -13,6 +13,7 @@ > #include "intel_context.h" > #include "intel_engine.h" > #include "intel_engine_pm.h" > +#include "intel_ring.h" > > static struct i915_global_context { > struct i915_global base; > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h > index dd742ac2fbdb..68b3d317d959 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.h > +++ b/drivers/gpu/drm/i915/gt/intel_context.h > @@ -12,6 +12,7 @@ > #include "i915_active.h" > #include "intel_context_types.h" > #include "intel_engine_types.h" > +#include "intel_ring_types.h" > #include "intel_timeline_types.h" > > void intel_context_init(struct intel_context *ce, > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h > index c7f93d05c8e0..d77b9f9f096c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h > @@ -19,7 +19,6 @@ > #include "intel_workarounds.h" > > struct drm_printer; > - > struct intel_gt; > > /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, > @@ -176,122 +175,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) > #define I915_HWS_CSB_WRITE_INDEX 0x1f > #define CNL_HWS_CSB_WRITE_INDEX 0x2f > > -struct intel_ring * > -intel_engine_create_ring(struct intel_engine_cs *engine, int size); > -int intel_ring_pin(struct intel_ring *ring); > -void intel_ring_reset(struct intel_ring *ring, u32 tail); > -unsigned int intel_ring_update_space(struct intel_ring *ring); > -void intel_ring_unpin(struct intel_ring *ring); > -void intel_ring_free(struct kref *ref); > - > -static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) > -{ > - kref_get(&ring->ref); > - return ring; > -} > - > -static inline void intel_ring_put(struct intel_ring *ring) > -{ > - kref_put(&ring->ref, intel_ring_free); > -} > - > void intel_engine_stop(struct intel_engine_cs *engine); > void intel_engine_cleanup(struct intel_engine_cs *engine); > > -int __must_check intel_ring_cacheline_align(struct i915_request *rq); > - > -u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n); > - > -static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) > -{ > - /* Dummy function. > - * > - * This serves as a placeholder in the code so that the reader > - * can compare against the preceding intel_ring_begin() and > - * check that the number of dwords emitted matches the space > - * reserved for the command packet (i.e. the value passed to > - * intel_ring_begin()). > - */ > - GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); > -} > - > -static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) > -{ > - return pos & (ring->size - 1); > -} > - > -static inline bool > -intel_ring_offset_valid(const struct intel_ring *ring, > - unsigned int pos) > -{ > - if (pos & -ring->size) /* must be strictly within the ring */ > - return false; > - > - if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ > - return false; > - > - return true; > -} > - > -static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) > -{ > - /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ > - u32 offset = addr - rq->ring->vaddr; > - GEM_BUG_ON(offset > rq->ring->size); > - return intel_ring_wrap(rq->ring, offset); > -} > - > -static inline void > -assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) > -{ > - GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); > - > - /* > - * "Ring Buffer Use" > - * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 > - * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 > - * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 > - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the > - * same cacheline, the Head Pointer must not be greater than the Tail > - * Pointer." > - * > - * We use ring->head as the last known location of the actual RING_HEAD, > - * it may have advanced but in the worst case it is equally the same > - * as ring->head and so we should never program RING_TAIL to advance > - * into the same cacheline as ring->head. > - */ > -#define cacheline(a) round_down(a, CACHELINE_BYTES) > - GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && > - tail < ring->head); > -#undef cacheline > -} > - > -static inline unsigned int > -intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) > -{ > - /* Whilst writes to the tail are strictly order, there is no > - * serialisation between readers and the writers. The tail may be > - * read by i915_request_retire() just as it is being updated > - * by execlists, as although the breadcrumb is complete, the context > - * switch hasn't been seen. > - */ > - assert_ring_tail_valid(ring, tail); > - ring->tail = tail; > - return tail; > -} > - > -static inline unsigned int > -__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) > -{ > - /* > - * "If the Ring Buffer Head Pointer and the Tail Pointer are on the > - * same cacheline, the Head Pointer must not be greater than the Tail > - * Pointer." > - */ > - GEM_BUG_ON(!is_power_of_2(size)); > - return (head - tail - CACHELINE_BYTES) & (size - 1); > -} > - > int intel_engines_init_mmio(struct intel_gt *gt); > int intel_engines_setup(struct intel_gt *gt); > int intel_engines_init(struct intel_gt *gt); > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index cd4caf54c59c..2afa2ef90482 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -37,6 +37,7 @@ > #include "intel_context.h" > #include "intel_lrc.h" > #include "intel_reset.h" > +#include "intel_ring.h" > > /* Haswell does have the CXT_SIZE register however it does not appear to be > * valid. Now, docs explain in dwords what is in the context object. The full > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c > index 6fbfa2162e54..3c0f490ff2c7 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c > @@ -13,6 +13,7 @@ > #include "intel_gt.h" > #include "intel_gt_pm.h" > #include "intel_rc6.h" > +#include "intel_ring.h" > > static int __engine_unpark(struct intel_wakeref *wf) > { > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index fbe89bfd3d4f..c5d1047a4bc5 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -59,6 +59,7 @@ struct i915_gem_context; > struct i915_request; > struct i915_sched_attr; > struct intel_gt; > +struct intel_ring; > struct intel_uncore; > > typedef u8 intel_engine_mask_t; > @@ -77,32 +78,6 @@ struct intel_instdone { > u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; > }; > > -struct intel_ring { > - struct kref ref; > - struct i915_vma *vma; > - void *vaddr; > - > - /* > - * As we have two types of rings, one global to the engine used > - * by ringbuffer submission and those that are exclusive to a > - * context used by execlists, we have to play safe and allow > - * atomic updates to the pin_count. However, the actual pinning > - * of the context is either done during initialisation for > - * ringbuffer submission or serialised as part of the context > - * pinning for execlists, and so we do not need a mutex ourselves > - * to serialise intel_ring_pin/intel_ring_unpin. > - */ > - atomic_t pin_count; > - > - u32 head; > - u32 tail; > - u32 emit; > - > - u32 space; > - u32 size; > - u32 effective_size; > -}; > - > /* > * we use a single page to load ctx workarounds so all of these > * values are referred in terms of dwords > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 8d8fecc69809..9d59debfd168 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -145,6 +145,7 @@ > #include "intel_lrc_reg.h" > #include "intel_mocs.h" > #include "intel_reset.h" > +#include "intel_ring.h" > #include "intel_workarounds.h" > > #define RING_EXECLIST_QFULL (1 << 0x2) > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c > index 06dba7ff294e..6d4c665a997d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > @@ -26,6 +26,7 @@ > #include "intel_gt.h" > #include "intel_mocs.h" > #include "intel_lrc.h" > +#include "intel_ring.h" > > /* structures required */ > struct drm_i915_mocs_entry { > diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c > index 6d05f9c64178..c4edc35e7d89 100644 > --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c > +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c > @@ -27,6 +27,7 @@ > > #include "i915_drv.h" > #include "intel_renderstate.h" > +#include "intel_ring.h" > > struct intel_renderstate { > const struct intel_renderstate_rodata *rodata; > diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c > new file mode 100644 > index 000000000000..98876b55f851 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/intel_ring.c > @@ -0,0 +1,314 @@ > +/* > + * SPDX-License-Identifier: MIT > + * > + * Copyright © 2019 Intel Corporation > + */ > + > +#include "gem/i915_gem_object.h" > +#include "i915_drv.h" > +#include "i915_vma.h" > +#include "intel_engine.h" > +#include "intel_ring.h" > +#include "intel_timeline.h" > + > +unsigned int intel_ring_update_space(struct intel_ring *ring) > +{ > + unsigned int space; > + > + space = __intel_ring_space(ring->head, ring->emit, ring->size); > + > + ring->space = space; > + return space; > +} > + > +int intel_ring_pin(struct intel_ring *ring) > +{ > + struct i915_vma *vma = ring->vma; > + unsigned int flags; > + void *addr; > + int ret; > + > + if (atomic_fetch_inc(&ring->pin_count)) > + return 0; > + > + flags = PIN_GLOBAL; > + > + /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ > + flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); > + > + if (vma->obj->stolen) > + flags |= PIN_MAPPABLE; > + else > + flags |= PIN_HIGH; > + > + ret = i915_vma_pin(vma, 0, 0, flags); > + if (unlikely(ret)) > + goto err_unpin; > + > + if (i915_vma_is_map_and_fenceable(vma)) > + addr = (void __force *)i915_vma_pin_iomap(vma); > + else > + addr = i915_gem_object_pin_map(vma->obj, > + i915_coherent_map_type(vma->vm->i915)); > + if (IS_ERR(addr)) { > + ret = PTR_ERR(addr); > + goto err_ring; > + } > + > + i915_vma_make_unshrinkable(vma); > + > + GEM_BUG_ON(ring->vaddr); > + ring->vaddr = addr; > + > + return 0; > + > +err_ring: > + i915_vma_unpin(vma); > +err_unpin: > + atomic_dec(&ring->pin_count); > + return ret; > +} > + > +void intel_ring_reset(struct intel_ring *ring, u32 tail) > +{ > + tail = intel_ring_wrap(ring, tail); > + ring->tail = tail; > + ring->head = tail; > + ring->emit = tail; > + intel_ring_update_space(ring); > +} > + > +void intel_ring_unpin(struct intel_ring *ring) > +{ > + if (!atomic_dec_and_test(&ring->pin_count)) > + return; > + > + /* Discard any unused bytes beyond that submitted to hw. */ > + intel_ring_reset(ring, ring->emit); > + > + GEM_BUG_ON(!ring->vma); > + i915_vma_unset_ggtt_write(ring->vma); > + if (i915_vma_is_map_and_fenceable(ring->vma)) > + i915_vma_unpin_iomap(ring->vma); > + else > + i915_gem_object_unpin_map(ring->vma->obj); > + > + GEM_BUG_ON(!ring->vaddr); > + ring->vaddr = NULL; > + > + i915_vma_unpin(ring->vma); > + i915_vma_make_purgeable(ring->vma); > +} > + > +static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) > +{ > + struct i915_address_space *vm = &ggtt->vm; > + struct drm_i915_private *i915 = vm->i915; > + struct drm_i915_gem_object *obj; > + struct i915_vma *vma; > + > + obj = i915_gem_object_create_stolen(i915, size); > + if (!obj) > + obj = i915_gem_object_create_internal(i915, size); > + if (IS_ERR(obj)) > + return ERR_CAST(obj); > + > + /* > + * Mark ring buffers as read-only from GPU side (so no stray overwrites) > + * if supported by the platform's GGTT. > + */ > + if (vm->has_read_only) > + i915_gem_object_set_readonly(obj); > + > + vma = i915_vma_instance(obj, vm, NULL); > + i915_gem_object_put(obj); > + > + return vma; > +} > + > +struct intel_ring * > +intel_engine_create_ring(struct intel_engine_cs *engine, int size) > +{ > + struct drm_i915_private *i915 = engine->i915; > + struct intel_ring *ring; > + struct i915_vma *vma; > + > + GEM_BUG_ON(!is_power_of_2(size)); > + GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); > + > + ring = kzalloc(sizeof(*ring), GFP_KERNEL); > + if (!ring) > + return ERR_PTR(-ENOMEM); > + > + kref_init(&ring->ref); > + > + ring->size = size; > + /* Workaround an erratum on the i830 which causes a hang if > + * the TAIL pointer points to within the last 2 cachelines > + * of the buffer. > + */ > + ring->effective_size = size; > + if (IS_I830(i915) || IS_I845G(i915)) > + ring->effective_size -= 2 * CACHELINE_BYTES; > + > + intel_ring_update_space(ring); > + > + vma = create_ring_vma(engine->gt->ggtt, size); > + if (IS_ERR(vma)) { > + kfree(ring); > + return ERR_CAST(vma); > + } > + ring->vma = vma; > + > + return ring; > +} > + > +void intel_ring_free(struct kref *ref) > +{ > + struct intel_ring *ring = container_of(ref, typeof(*ring), ref); > + > + i915_vma_put(ring->vma); > + > + kfree(ring); > +} > + > +static noinline int > +wait_for_space(struct intel_ring *ring, > + struct intel_timeline *tl, > + unsigned int bytes) > +{ > + struct i915_request *target; > + long timeout; > + > + if (intel_ring_update_space(ring) >= bytes) > + return 0; > + > + GEM_BUG_ON(list_empty(&tl->requests)); > + list_for_each_entry(target, &tl->requests, link) { > + if (target->ring != ring) > + continue; > + > + /* Would completion of this request free enough space? */ > + if (bytes <= __intel_ring_space(target->postfix, > + ring->emit, ring->size)) > + break; > + } > + > + if (GEM_WARN_ON(&target->link == &tl->requests)) > + return -ENOSPC; > + > + timeout = i915_request_wait(target, > + I915_WAIT_INTERRUPTIBLE, > + MAX_SCHEDULE_TIMEOUT); > + if (timeout < 0) > + return timeout; > + > + i915_request_retire_upto(target); > + > + intel_ring_update_space(ring); > + GEM_BUG_ON(ring->space < bytes); > + return 0; > +} > + > +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) > +{ > + struct intel_ring *ring = rq->ring; > + const unsigned int remain_usable = ring->effective_size - ring->emit; > + const unsigned int bytes = num_dwords * sizeof(u32); > + unsigned int need_wrap = 0; > + unsigned int total_bytes; > + u32 *cs; > + > + /* Packets must be qword aligned. */ > + GEM_BUG_ON(num_dwords & 1); > + > + total_bytes = bytes + rq->reserved_space; > + GEM_BUG_ON(total_bytes > ring->effective_size); > + > + if (unlikely(total_bytes > remain_usable)) { > + const int remain_actual = ring->size - ring->emit; > + > + if (bytes > remain_usable) { > + /* > + * Not enough space for the basic request. So need to > + * flush out the remainder and then wait for > + * base + reserved. > + */ > + total_bytes += remain_actual; > + need_wrap = remain_actual | 1; > + } else { > + /* > + * The base request will fit but the reserved space > + * falls off the end. So we don't need an immediate > + * wrap and only need to effectively wait for the > + * reserved size from the start of ringbuffer. > + */ > + total_bytes = rq->reserved_space + remain_actual; > + } > + } > + > + if (unlikely(total_bytes > ring->space)) { > + int ret; > + > + /* > + * Space is reserved in the ringbuffer for finalising the > + * request, as that cannot be allowed to fail. During request > + * finalisation, reserved_space is set to 0 to stop the > + * overallocation and the assumption is that then we never need > + * to wait (which has the risk of failing with EINTR). > + * > + * See also i915_request_alloc() and i915_request_add(). > + */ > + GEM_BUG_ON(!rq->reserved_space); > + > + ret = wait_for_space(ring, rq->timeline, total_bytes); > + if (unlikely(ret)) > + return ERR_PTR(ret); > + } > + > + if (unlikely(need_wrap)) { > + need_wrap &= ~1; > + GEM_BUG_ON(need_wrap > ring->space); > + GEM_BUG_ON(ring->emit + need_wrap > ring->size); > + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); > + > + /* Fill the tail with MI_NOOP */ > + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); > + ring->space -= need_wrap; > + ring->emit = 0; > + } > + > + GEM_BUG_ON(ring->emit > ring->size - bytes); > + GEM_BUG_ON(ring->space < bytes); > + cs = ring->vaddr + ring->emit; > + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); > + ring->emit += bytes; > + ring->space -= bytes; > + > + return cs; > +} > + > +/* Align the ring tail to a cacheline boundary */ > +int intel_ring_cacheline_align(struct i915_request *rq) > +{ > + int num_dwords; > + void *cs; > + > + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); > + if (num_dwords == 0) > + return 0; > + > + num_dwords = CACHELINE_DWORDS - num_dwords; > + GEM_BUG_ON(num_dwords & 1); > + > + cs = intel_ring_begin(rq, num_dwords); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); > + > + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); > + intel_ring_advance(rq, cs + num_dwords); > + > + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); > + return 0; > +} > + > diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h > new file mode 100644 > index 000000000000..ea2839d9e044 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gt/intel_ring.h > @@ -0,0 +1,131 @@ > +/* > + * SPDX-License-Identifier: MIT > + * > + * Copyright © 2019 Intel Corporation > + */ > + > +#ifndef INTEL_RING_H > +#define INTEL_RING_H > + > +#include "i915_gem.h" /* GEM_BUG_ON */ > +#include "i915_request.h" > +#include "intel_ring_types.h" > + > +struct intel_engine_cs; > + > +struct intel_ring * > +intel_engine_create_ring(struct intel_engine_cs *engine, int size); > + > +u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords); > +int intel_ring_cacheline_align(struct i915_request *rq); > + > +unsigned int intel_ring_update_space(struct intel_ring *ring); > + > +int intel_ring_pin(struct intel_ring *ring); > +void intel_ring_unpin(struct intel_ring *ring); > +void intel_ring_reset(struct intel_ring *ring, u32 tail); > + > +void intel_ring_free(struct kref *ref); > + > +static inline struct intel_ring *intel_ring_get(struct intel_ring *ring) > +{ > + kref_get(&ring->ref); > + return ring; > +} > + > +static inline void intel_ring_put(struct intel_ring *ring) > +{ > + kref_put(&ring->ref, intel_ring_free); > +} > + > +static inline void intel_ring_advance(struct i915_request *rq, u32 *cs) > +{ > + /* Dummy function. > + * > + * This serves as a placeholder in the code so that the reader > + * can compare against the preceding intel_ring_begin() and > + * check that the number of dwords emitted matches the space > + * reserved for the command packet (i.e. the value passed to > + * intel_ring_begin()). > + */ > + GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs); > +} > + > +static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) > +{ > + return pos & (ring->size - 1); > +} > + > +static inline bool > +intel_ring_offset_valid(const struct intel_ring *ring, > + unsigned int pos) > +{ > + if (pos & -ring->size) /* must be strictly within the ring */ > + return false; > + > + if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */ > + return false; > + > + return true; > +} > + > +static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr) > +{ > + /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ > + u32 offset = addr - rq->ring->vaddr; > + GEM_BUG_ON(offset > rq->ring->size); > + return intel_ring_wrap(rq->ring, offset); > +} > + > +static inline void > +assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail) > +{ > + GEM_BUG_ON(!intel_ring_offset_valid(ring, tail)); > + > + /* > + * "Ring Buffer Use" > + * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 > + * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5 > + * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5 > + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the > + * same cacheline, the Head Pointer must not be greater than the Tail > + * Pointer." > + * > + * We use ring->head as the last known location of the actual RING_HEAD, > + * it may have advanced but in the worst case it is equally the same > + * as ring->head and so we should never program RING_TAIL to advance > + * into the same cacheline as ring->head. > + */ > +#define cacheline(a) round_down(a, CACHELINE_BYTES) > + GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) && > + tail < ring->head); > +#undef cacheline > +} > + > +static inline unsigned int > +intel_ring_set_tail(struct intel_ring *ring, unsigned int tail) > +{ > + /* Whilst writes to the tail are strictly order, there is no > + * serialisation between readers and the writers. The tail may be > + * read by i915_request_retire() just as it is being updated > + * by execlists, as although the breadcrumb is complete, the context > + * switch hasn't been seen. > + */ > + assert_ring_tail_valid(ring, tail); > + ring->tail = tail; > + return tail; > +} > + > +static inline unsigned int > +__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size) > +{ > + /* > + * "If the Ring Buffer Head Pointer and the Tail Pointer are on the > + * same cacheline, the Head Pointer must not be greater than the Tail > + * Pointer." > + */ > + GEM_BUG_ON(!is_power_of_2(size)); > + return (head - tail - CACHELINE_BYTES) & (size - 1); > +} > + > +#endif /* INTEL_RING_H */ > diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c > index 0f959694303c..14ad10acd548 100644 > --- a/drivers/gpu/drm/i915/gt/intel_timeline.c > +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c > @@ -4,13 +4,13 @@ > * Copyright © 2016-2018 Intel Corporation > */ > > -#include "gt/intel_gt_types.h" > - > #include "i915_drv.h" > > #include "i915_active.h" > #include "i915_syncmap.h" > -#include "gt/intel_timeline.h" > +#include "intel_gt.h" > +#include "intel_ring.h" > +#include "intel_timeline.h" > > #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) > #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c > index af8a8183154a..7cb6dab4399d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c > @@ -7,6 +7,7 @@ > #include "i915_drv.h" > #include "intel_context.h" > #include "intel_gt.h" > +#include "intel_ring.h" > #include "intel_workarounds.h" > > /** > diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c > index 123db2c3f956..83f549d203a0 100644 > --- a/drivers/gpu/drm/i915/gt/mock_engine.c > +++ b/drivers/gpu/drm/i915/gt/mock_engine.c > @@ -23,6 +23,7 @@ > */ > > #include "gem/i915_gem_context.h" > +#include "gt/intel_ring.h" > > #include "i915_drv.h" > #include "intel_context.h" > diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c > index dac86f699a4c..f04a59fe5d2c 100644 > --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c > +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c > @@ -9,6 +9,7 @@ > #include "intel_engine_pm.h" > #include "intel_gt.h" > #include "intel_gt_requests.h" > +#include "intel_ring.h" > > #include "../selftests/i915_random.h" > #include "../i915_selftest.h" > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 009e54a3764f..1b1691aaed28 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -6,12 +6,13 @@ > #include <linux/circ_buf.h> > > #include "gem/i915_gem_context.h" > - > #include "gt/intel_context.h" > #include "gt/intel_engine_pm.h" > #include "gt/intel_gt.h" > #include "gt/intel_gt_pm.h" > #include "gt/intel_lrc_reg.h" > +#include "gt/intel_ring.h" > + > #include "intel_guc_submission.h" > > #include "i915_drv.h" > diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c > index e753b1e706e2..6a3ac8cde95d 100644 > --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c > +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c > @@ -35,7 +35,9 @@ > */ > > #include <linux/slab.h> > + > #include "i915_drv.h" > +#include "gt/intel_ring.h" > #include "gvt.h" > #include "i915_pvinfo.h" > #include "trace.h" > diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c > index 4208e40445b1..aaf15916d29a 100644 > --- a/drivers/gpu/drm/i915/gvt/mmio_context.c > +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c > @@ -35,6 +35,7 @@ > > #include "i915_drv.h" > #include "gt/intel_context.h" > +#include "gt/intel_ring.h" > #include "gvt.h" > #include "trace.h" > > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c > index a5b942ee3ceb..377811f8853f 100644 > --- a/drivers/gpu/drm/i915/gvt/scheduler.c > +++ b/drivers/gpu/drm/i915/gvt/scheduler.c > @@ -38,6 +38,7 @@ > #include "gem/i915_gem_context.h" > #include "gem/i915_gem_pm.h" > #include "gt/intel_context.h" > +#include "gt/intel_ring.h" > > #include "i915_drv.h" > #include "gvt.h" > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c > index 4c190a548ba7..3e3495838a93 100644 > --- a/drivers/gpu/drm/i915/i915_active.c > +++ b/drivers/gpu/drm/i915/i915_active.c > @@ -8,6 +8,7 @@ > > #include "gt/intel_engine_heartbeat.h" > #include "gt/intel_engine_pm.h" > +#include "gt/intel_ring.h" > > #include "i915_drv.h" > #include "i915_active.h" > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 3130b0c7ed83..38d3de2dfaa6 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -200,6 +200,7 @@ > #include "gt/intel_engine_user.h" > #include "gt/intel_gt.h" > #include "gt/intel_lrc_reg.h" > +#include "gt/intel_ring.h" > > #include "i915_drv.h" > #include "i915_perf.h" > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 4575f368455d..932c5cf190b5 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -31,6 +31,7 @@ > > #include "gem/i915_gem_context.h" > #include "gt/intel_context.h" > +#include "gt/intel_ring.h" > > #include "i915_active.h" > #include "i915_drv.h" > -- > 2.24.0.rc0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx