From: Alex Dai <yu.dai@xxxxxxxxx> Add functions to submit work queue item and ring the door bell. GuC TLB needs to be invalided if LRC context changes. Issue: VIZ-4884 Signed-off-by: Alex Dai <yu.dai@xxxxxxxxx> --- drivers/gpu/drm/i915/intel_guc.h | 3 + drivers/gpu/drm/i915/intel_guc_client.c | 182 ++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_guc_scheduler.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 16 ++- 4 files changed, 200 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 4d9fb34..c44b3c0 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -31,6 +31,7 @@ #define GUC_WQ_SIZE (PAGE_SIZE * 2) struct i915_guc_client { + spinlock_t wq_lock; struct drm_i915_gem_object *client_obj; u32 priority; off_t doorbell_offset; @@ -39,6 +40,8 @@ struct i915_guc_client { uint16_t doorbell_id; uint32_t ctx_index; uint32_t wq_size; + uint32_t wq_tail; + uint32_t cookie; }; #define I915_MAX_DOORBELLS 256 diff --git a/drivers/gpu/drm/i915/intel_guc_client.c b/drivers/gpu/drm/i915/intel_guc_client.c index ae6323a..d64cad6 100644 --- a/drivers/gpu/drm/i915/intel_guc_client.c +++ b/drivers/gpu/drm/i915/intel_guc_client.c @@ -22,6 +22,7 @@ * */ #include <linux/firmware.h> +#include <linux/circ_buf.h> #include "i915_drv.h" #include "intel_guc.h" @@ -52,6 +53,14 @@ * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) * mapped into process space. * + * Work Items: + * There are several types of work items that the host may place into a + * workqueue, each with its own requirements and limitations. Currently only + * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which + * represents in-order queue. The kernel driver packs ring tail pointer and an + * ELSP context descriptor dword into Work Item. + * See add_workqueue_item() + * */ /* @@ -411,6 +420,8 @@ i915_guc_client_alloc(struct drm_device *dev, u32 priority) /* XXX: evict a doorbell instead */ goto err; + spin_lock_init(&client->wq_lock); + init_ctx_desc(guc, client); init_proc_desc(guc, client); init_doorbell(guc, client); @@ -430,6 +441,167 @@ err: return NULL; } +/* Get valid workqueue item and return it back to offset */ +static int get_workqueue_space(struct i915_guc_client *gc, u32 *offset) +{ + struct guc_process_desc *desc; + void *base; + u32 size = sizeof(struct guc_wq_item); + int ret = 0, timeout_counter = 200; + unsigned long flags; + + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); + desc = base + gc->proc_desc_offset; + + while (timeout_counter-- > 0) { + spin_lock_irqsave(&gc->wq_lock, flags); + + ret = wait_for(CIRC_SPACE(gc->wq_tail, desc->head, + gc->wq_size) >= size, 1); + + if (!ret) { + *offset = gc->wq_tail; + + /* advance the tail for next workqueue item */ + gc->wq_tail += size; + gc->wq_tail &= gc->wq_size - 1; + + /* this will break the loop */ + timeout_counter = 0; + } + + spin_unlock_irqrestore(&gc->wq_lock, flags); + }; + + kunmap_atomic(base); + + return ret; +} + + +static int add_workqueue_item(struct i915_guc_client *gc, + struct intel_context *ctx, + struct intel_engine_cs *ring) +{ + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + struct drm_i915_gem_object *ctx_obj; + struct guc_wq_item *wqi; + void *base; + struct page *page; + u32 wq_off = 0, tail = ringbuf->tail, wq_len; + int ret; + + ctx_obj = ctx->engine[ring->id].state; + + WARN_ON(!i915_gem_obj_is_pinned(ctx_obj)); + WARN_ON(!i915_gem_obj_is_pinned(ringbuf->obj)); + + /* Need this because of the deferred pin ctx and ring */ + /* Shall we move this right after ring is pinned? */ + page = i915_gem_object_get_page(ctx_obj, 1); + base = kmap_atomic(page); + + *((u32 *)base + CTX_RING_BUFFER_START + 1) = + i915_gem_obj_ggtt_offset(ringbuf->obj); + + kunmap_atomic(base); + + ret = get_workqueue_space(gc, &wq_off); + if (ret) + return ret; + + /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we + * should not have the case where structure wqi is across page, neither + * wrapped to the beginning. This simplifies the implementation below. + * + * XXX: if not the case, we need save data to a temp wqi and copy it to + * workqueue buffer dw by dw. + */ + WARN_ON(sizeof(struct guc_wq_item) != 16); + WARN_ON(wq_off & 3); + + /* wq starts from the page after doorbell / process_desc */ + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, + (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT)); + wq_off &= PAGE_SIZE - 1; + wqi = (struct guc_wq_item *)((char *)base + wq_off); + + /* len does not include the header */ + wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1; + wqi->header = WQ_TYPE_INORDER | + (wq_len << WQ_LEN_SHIFT) | + (ring->id << WQ_TARGET_SHIFT) | + WQ_NO_WCFLUSH_WAIT; + + wqi->context_desc = (u32)execlists_ctx_descriptor(ring, ctx_obj); + /* tail index is in qw */ + tail >>= 3; + wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; + wqi->fence_id = 0; /*XXX: what fence to be here */ + + kunmap_atomic(base); + + return 0; +} + +static int ring_doorbell(struct i915_guc_client *gc) +{ + struct guc_process_desc *desc; + union guc_doorbell_qw db_cmp, db_exc, db_ret; + union guc_doorbell_qw *db; + void *base; + int attempt = 2, ret = -EAGAIN; + + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); + desc = base + gc->proc_desc_offset; + + /* Update the tail so it is visible to GuC */ + desc->tail = gc->wq_tail; + + /* current cookie */ + db_cmp.db_status = GUC_DOORBELL_ENABLED; + db_cmp.cookie = gc->cookie; + + /* cookie to be updated */ + db_exc.db_status = GUC_DOORBELL_ENABLED; + db_exc.cookie = gc->cookie + 1; + if (db_exc.cookie == 0) + db_exc.cookie = 1; + + /* pointer of current doorbell cacheline */ + db = base + gc->doorbell_offset; + + while (attempt--) { + /* lets ring the doorbell */ + db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db, + db_cmp.value_qw, db_exc.value_qw); + + /* if the exchange was successfully executed */ + if (db_ret.value_qw == db_cmp.value_qw) { + /* db was successfully rung */ + gc->cookie = db_exc.cookie; + ret = 0; + break; + } + + /* XXX: doorbell was lost and need to acquire it again */ + if (db_ret.db_status == GUC_DOORBELL_DISABLED) + break; + + DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", + db_cmp.cookie, db_ret.cookie); + + /* update the cookie to newly read cookie from GuC */ + db_cmp.cookie = db_ret.cookie; + db_exc.cookie = db_ret.cookie + 1; + if (db_exc.cookie == 0) + db_exc.cookie = 1; + } + + kunmap_atomic(base); + return ret; +} + /** * i915_guc_client_submit() - Submit commands through GuC * @client: the guc client where commands will go through @@ -442,5 +614,13 @@ int i915_guc_client_submit(struct i915_guc_client *client, struct intel_context *ctx, struct intel_engine_cs *ring) { - return 0; + int ret; + + ret = add_workqueue_item(client, ctx, ring); + if (ret) + return ret; + + ret = ring_doorbell(client); + + return ret; } diff --git a/drivers/gpu/drm/i915/intel_guc_scheduler.c b/drivers/gpu/drm/i915/intel_guc_scheduler.c index 008f74c..c0b7231 100644 --- a/drivers/gpu/drm/i915/intel_guc_scheduler.c +++ b/drivers/gpu/drm/i915/intel_guc_scheduler.c @@ -160,6 +160,6 @@ bool sanitize_enable_guc_scheduling(struct drm_device *dev) if (!HAS_GUC_UCODE(dev) || !HAS_GUC_SCHED(dev)) return false; - return i915.enable_guc_scheduling; + return i915.enable_execlists && i915.enable_guc_scheduling; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d4011b4..cb5a617 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -618,13 +618,17 @@ intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf, struct drm_i915_gem_request *request) { struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; intel_logical_ring_advance(ringbuf); if (intel_ring_stopped(ring)) return; - execlists_context_queue(ring, ctx, ringbuf->tail, request); + if (dev_priv->guc.execbuf_client) + i915_guc_client_submit(dev_priv->guc.execbuf_client, ctx, ring); + else + execlists_context_queue(ring, ctx, ringbuf->tail, request); } static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, @@ -941,6 +945,7 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring, { struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + struct drm_i915_private *dev_priv = ring->dev->dev_private; int ret = 0; WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); @@ -953,6 +958,10 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring, ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf); if (ret) goto unpin_ctx_obj; + + /* Invalidate GuC TLB. */ + if (i915.enable_guc_scheduling) + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); } return ret; @@ -1284,8 +1293,13 @@ out: static int gen8_init_rcs_context(struct intel_engine_cs *ring, struct intel_context *ctx) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; int ret; + /* Invalidate GuC TLB. */ + if (i915.enable_guc_scheduling) + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + ret = intel_logical_ring_workarounds_emit(ring, ctx); if (ret) return ret; -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx