On Thu, Jul 09, 2015 at 07:29:10PM +0100, Dave Gordon wrote: > A GuC client has its own doorbell and workqueue. It maintains the > doorbell cache line, process description object and work queue item. > > A default guc_client is created for the i915 driver to use for > normal-priority in-order submission. > > Note that the created client is not yet ready for use; doorbell > allocation will fail as we haven't yet linked the GuC's context > descriptor to the default contexts for each ring (see later patch). > > v2: > Defer adding structure members until needed [Chris Wilson] > Rationalise type declarations [Chris Wilson] > > v4: > Rebased > > Issue: VIZ-4884 > Signed-off-by: Alex Dai <yu.dai@xxxxxxxxx> > Signed-off-by: Dave Gordon <david.s.gordon@xxxxxxxxx> [TOR:] I had some non-critical questions below. Reviewed-by: Tom O'Rourke <Tom.O'Rourke@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_guc_submission.c | 649 +++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_guc.h | 42 ++ > drivers/gpu/drm/i915/intel_guc_loader.c | 12 + > 3 files changed, 703 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index e9d46d6..25d8807 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -27,6 +27,512 @@ > #include "intel_guc.h" > > /** > + * DOC: GuC Client > + * > + * i915_guc_client: > + * We use the term client to avoid confusion with contexts. A i915_guc_client is > + * equivalent to GuC object guc_context_desc. This context descriptor is > + * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell > + * and workqueue for it. Also the process descriptor (guc_process_desc), which > + * is mapped to client space. So the client can write Work Item then ring the > + * doorbell. > + * > + * To simplify the implementation, we allocate one gem object that contains all > + * pages for doorbell, process descriptor and workqueue. > + * > + * The Scratch registers: > + * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes > + * a value to the action register (SOFT_SCRATCH_0) along with any data. It then > + * triggers an interrupt on the GuC via another register write (0xC4C8). > + * Firmware writes a success/fail code back to the action register after > + * processes the request. The kernel driver polls waiting for this update and > + * then proceeds. > + * See host2guc_action() > + * > + * Doorbells: > + * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) > + * mapped into process space. > + * > + * Work Items: > + * There are several types of work items that the host may place into a > + * workqueue, each with its own requirements and limitations. Currently only > + * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which > + * represents in-order queue. The kernel driver packs ring tail pointer and an > + * ELSP context descriptor dword into Work Item. > + * See guc_add_workqueue_item() > + * > + */ > + > +/* > + * Read GuC command/status register (SOFT_SCRATCH_0) > + * Return true if it contains a response rather than a command > + */ > +static inline bool host2guc_action_response(struct drm_i915_private *dev_priv, > + u32 *status) > +{ > + u32 val = I915_READ(SOFT_SCRATCH(0)); > + *status = val; > + return GUC2HOST_IS_RESPONSE(val); > +} > + > +static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) > +{ > + struct drm_i915_private *dev_priv = guc_to_i915(guc); > + u32 status; > + int i; > + int ret; > + > + if (WARN_ON(len < 1 || len > 15)) > + return -EINVAL; > + [TOR:] Would it be good for host2guc_action to take a forcewake? There are several writes and polling reads for completion. Taking a forcewake could avoid surplus forcewakes for each register access. > + spin_lock(&dev_priv->guc.host2guc_lock); > + > + dev_priv->guc.action_count += 1; > + dev_priv->guc.action_cmd = data[0]; > + > + for (i = 0; i < len; i++) > + I915_WRITE(SOFT_SCRATCH(i), data[i]); > + > + POSTING_READ(SOFT_SCRATCH(i - 1)); > + > + I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER); > + > + ret = wait_for_atomic(host2guc_action_response(dev_priv, &status), 10); [TOR:] Why 10? > + if (status != GUC2HOST_STATUS_SUCCESS) { > + /* either GuC doesn't respond, which is a TIMEOUT, > + * or a failure code is returned. */ > + if (ret != -ETIMEDOUT) > + ret = -EIO; > + > + DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " > + "status=0x%08X response=0x%08X\n", > + data[0], ret, status, > + I915_READ(SOFT_SCRATCH(15))); > + > + dev_priv->guc.action_fail += 1; > + dev_priv->guc.action_err = ret; > + } > + dev_priv->guc.action_status = status; > + > + spin_unlock(&dev_priv->guc.host2guc_lock); > + > + return ret; > +} > + > +/* > + * Tell the GuC to allocate or deallocate a specific doorbell > + */ > + > +static int host2guc_allocate_doorbell(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + u32 data[2]; > + > + data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL; > + data[1] = client->ctx_index; > + > + return host2guc_action(guc, data, 2); > +} > + > +static int host2guc_release_doorbell(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + u32 data[2]; > + > + data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL; > + data[1] = client->ctx_index; > + > + return host2guc_action(guc, data, 2); > +} > + > +/* > + * Initialise, update, or clear doorbell data shared with the GuC > + * > + * These functions modify shared data and so need access to the mapped > + * client object which contains the page being used for the doorbell > + */ > + > +static void guc_init_doorbell(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct guc_doorbell_info *doorbell; > + void *base; > + > + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); > + doorbell = base + client->doorbell_offset; > + > + doorbell->db_status = 1; > + doorbell->cookie = 0; > + > + kunmap_atomic(base); > +} > + > +static int guc_ring_doorbell(struct i915_guc_client *gc) > +{ > + struct guc_process_desc *desc; > + union guc_doorbell_qw db_cmp, db_exc, db_ret; > + union guc_doorbell_qw *db; > + void *base; > + int attempt = 2, ret = -EAGAIN; > + > + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); > + desc = base + gc->proc_desc_offset; > + > + /* Update the tail so it is visible to GuC */ > + desc->tail = gc->wq_tail; > + > + /* current cookie */ > + db_cmp.db_status = GUC_DOORBELL_ENABLED; > + db_cmp.cookie = gc->cookie; > + > + /* cookie to be updated */ > + db_exc.db_status = GUC_DOORBELL_ENABLED; > + db_exc.cookie = gc->cookie + 1; > + if (db_exc.cookie == 0) > + db_exc.cookie = 1; > + > + /* pointer of current doorbell cacheline */ > + db = base + gc->doorbell_offset; > + > + while (attempt--) { > + /* lets ring the doorbell */ > + db_ret.value_qw = atomic64_cmpxchg((atomic64_t *)db, > + db_cmp.value_qw, db_exc.value_qw); > + > + /* if the exchange was successfully executed */ > + if (db_ret.value_qw == db_cmp.value_qw) { > + /* db was successfully rung */ > + gc->cookie = db_exc.cookie; > + ret = 0; > + break; > + } > + > + /* XXX: doorbell was lost and need to acquire it again */ > + if (db_ret.db_status == GUC_DOORBELL_DISABLED) > + break; > + > + DRM_ERROR("Cookie mismatch. Expected %d, returned %d\n", > + db_cmp.cookie, db_ret.cookie); > + > + /* update the cookie to newly read cookie from GuC */ > + db_cmp.cookie = db_ret.cookie; > + db_exc.cookie = db_ret.cookie + 1; > + if (db_exc.cookie == 0) > + db_exc.cookie = 1; > + } > + > + kunmap_atomic(base); > + return ret; > +} > + > +static void guc_disable_doorbell(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct drm_i915_private *dev_priv = guc_to_i915(guc); > + struct guc_doorbell_info *doorbell; > + void *base; > + int drbreg = GEN8_DRBREGL(client->doorbell_id); > + int value; > + > + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); > + doorbell = base + client->doorbell_offset; > + > + doorbell->db_status = 0; > + > + kunmap_atomic(base); > + > + I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID); > + > + value = I915_READ(drbreg); > + WARN_ON((value & GEN8_DRB_VALID) != 0); > + > + I915_WRITE(GEN8_DRBREGU(client->doorbell_id), 0); > + I915_WRITE(drbreg, 0); > + > + /* XXX: wait for any interrupts */ > + /* XXX: wait for workqueue to drain */ > +} > + > +/* > + * Select, assign and relase doorbell cachelines > + * > + * These functions track which doorbell cachelines are in use. > + * The data they manipulate is protected by the host2guc lock. > + */ > + > +static uint32_t select_doorbell_cacheline(struct intel_guc *guc) > +{ > + const uint32_t cacheline_size = boot_cpu_data.x86_clflush_size; > + uint32_t offset; > + > + spin_lock(&guc->host2guc_lock); > + > + /* Doorbell uses a single cache line within a page */ > + offset = guc->db_cacheline & PAGE_MASK; > + > + /* Moving to next cache line to reduce contention */ > + guc->db_cacheline += cacheline_size; > + > + spin_unlock(&guc->host2guc_lock); > + > + return offset; > +} > + > +static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority) > +{ > + /* The bitmap is split into two halves - high and normal priority. */ > + const uint16_t half = GUC_MAX_DOORBELLS / 2; > + const uint16_t start = (priority <= GUC_CTX_PRIORITY_HIGH) ? half : 0; > + const uint16_t end = start + half; > + uint16_t id; > + > + spin_lock(&guc->host2guc_lock); > + id = find_next_zero_bit(guc->doorbell_bitmap, end, start); > + if (id == end) > + id = GUC_INVALID_DOORBELL_ID; > + else > + bitmap_set(guc->doorbell_bitmap, id, 1); > + spin_unlock(&guc->host2guc_lock); > + > + return id; > +} > + > +static void release_doorbell(struct intel_guc *guc, uint16_t id) > +{ > + spin_lock(&guc->host2guc_lock); > + bitmap_clear(guc->doorbell_bitmap, id, 1); > + spin_unlock(&guc->host2guc_lock); > +} > + > +/* > + * Initialise the process descriptor shared with the GuC firmware. > + */ > +static void guc_init_proc_desc(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct guc_process_desc *desc; > + void *base; > + > + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); > + desc = base + client->proc_desc_offset; > + > + memset(desc, 0, sizeof(*desc)); > + > + /* > + * XXX: pDoorbell and WQVBaseAddress are pointers in process address > + * space for ring3 clients (set them as in mmap_ioctl) or kernel > + * space for kernel clients (map on demand instead? May make debug > + * easier to have it mapped). > + */ > + desc->wq_base_addr = 0; > + desc->db_base_addr = 0; > + > + desc->context_id = client->ctx_index; > + desc->wq_size_bytes = client->wq_size; > + desc->wq_status = WQ_STATUS_ACTIVE; > + desc->priority = client->priority; > + > + kunmap_atomic(base); > +} > + > +/* > + * Initialise/clear the context descriptor shared with the GuC firmware. > + * > + * This descriptor tells the GuC where (in GGTT space) to find the important > + * data structures relating to this client (doorbell, process descriptor, > + * write queue, etc). > + */ > + > +static void guc_init_ctx_desc(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct guc_context_desc desc; > + struct sg_table *sg; > + > + memset(&desc, 0, sizeof(desc)); > + > + desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; > + desc.context_id = client->ctx_index; > + desc.priority = client->priority; > + desc.engines_used = (1 << RCS) | (1 << VCS) | (1 << BCS) | > + (1 << VECS) | (1 << VCS2); /* all engines */ > + desc.db_id = client->doorbell_id; > + > + /* > + * The CPU address is only needed at certain points, so kmap_atomic on > + * demand instead of storing it in the ctx descriptor. > + * XXX: May make debug easier to have it mapped > + */ > + desc.db_trigger_cpu = 0; > + desc.db_trigger_uk = client->doorbell_offset + > + i915_gem_obj_ggtt_offset(client->client_obj); > + desc.db_trigger_phy = client->doorbell_offset + > + sg_dma_address(client->client_obj->pages->sgl); > + > + desc.process_desc = client->proc_desc_offset + > + i915_gem_obj_ggtt_offset(client->client_obj); > + > + desc.wq_addr = client->wq_offset + > + i915_gem_obj_ggtt_offset(client->client_obj); > + > + desc.wq_size = client->wq_size; > + > + /* > + * XXX: Take LRCs from an existing intel_context if this is not an > + * IsKMDCreatedContext client > + */ > + desc.desc_private = (uintptr_t)client; > + > + /* Pool context is pinned already */ > + sg = guc->ctx_pool_obj->pages; > + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), > + sizeof(desc) * client->ctx_index); > +} > + > +static void guc_fini_ctx_desc(struct intel_guc *guc, > + struct i915_guc_client *client) > +{ > + struct guc_context_desc desc; > + struct sg_table *sg; > + > + memset(&desc, 0, sizeof(desc)); > + > + sg = guc->ctx_pool_obj->pages; > + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), > + sizeof(desc) * client->ctx_index); > +} > + > +/* Get valid workqueue item and return it back to offset */ > +static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset) > +{ > + struct guc_process_desc *desc; > + void *base; > + u32 size = sizeof(struct guc_wq_item); > + int ret = 0, timeout_counter = 200; > + unsigned long flags; > + > + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0)); > + desc = base + gc->proc_desc_offset; > + > + while (timeout_counter-- > 0) { > + spin_lock_irqsave(&gc->wq_lock, flags); > + > + ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head, > + gc->wq_size) >= size, 1); > + > + if (!ret) { > + *offset = gc->wq_tail; > + > + /* advance the tail for next workqueue item */ > + gc->wq_tail += size; > + gc->wq_tail &= gc->wq_size - 1; > + > + /* this will break the loop */ > + timeout_counter = 0; > + } > + > + spin_unlock_irqrestore(&gc->wq_lock, flags); > + }; > + > + kunmap_atomic(base); > + > + return ret; > +} > + > +static int guc_add_workqueue_item(struct i915_guc_client *gc, > + struct drm_i915_gem_request *rq) > +{ > + enum intel_ring_id ring_id = rq->ring->id; > + struct guc_wq_item *wqi; > + void *base; > + u32 tail, wq_len, wq_off = 0; > + int ret; > + > + ret = guc_get_workqueue_space(gc, &wq_off); > + if (ret) > + return ret; > + > + /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we > + * should not have the case where structure wqi is across page, neither > + * wrapped to the beginning. This simplifies the implementation below. > + * > + * XXX: if not the case, we need save data to a temp wqi and copy it to > + * workqueue buffer dw by dw. > + */ > + WARN_ON(sizeof(struct guc_wq_item) != 16); > + WARN_ON(wq_off & 3); > + > + /* wq starts from the page after doorbell / process_desc */ > + base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, > + (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT)); > + wq_off &= PAGE_SIZE - 1; > + wqi = (struct guc_wq_item *)((char *)base + wq_off); > + > + /* len does not include the header */ > + wq_len = sizeof(struct guc_wq_item) / sizeof(u32) - 1; > + wqi->header = WQ_TYPE_INORDER | > + (wq_len << WQ_LEN_SHIFT) | > + (ring_id << WQ_TARGET_SHIFT) | > + WQ_NO_WCFLUSH_WAIT; > + > + /* The GuC wants only the low-order word of the context descriptor */ > + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, rq->ring); > + > + /* The GuC firmware wants the tail index in QWords, not bytes */ > + tail = rq->ringbuf->tail >> 3; > + wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; > + wqi->fence_id = 0; /*XXX: what fence to be here */ > + > + kunmap_atomic(base); > + > + return 0; > +} > + > +/** > + * i915_guc_submit() - Submit commands through GuC > + * @client: the guc client where commands will go through > + * @ctx: LRC where commands come from > + * @ring: HW engine that will excute the commands > + * > + * Return: 0 if succeed > + */ > +int i915_guc_submit(struct i915_guc_client *client, > + struct drm_i915_gem_request *rq) > +{ > + unsigned long flags; > + int q_ret, b_ret; > + > + /* Need this because of the deferred pin ctx and ring */ > + /* Shall we move this right after ring is pinned? */ > + intel_lr_context_update(rq); > + > + q_ret = guc_add_workqueue_item(client, rq); > + if (q_ret == 0) > + b_ret = guc_ring_doorbell(client); > + > + spin_lock_irqsave(&client->wq_lock, flags); > + client->submissions += 1; > + if (q_ret) { > + client->q_fail += 1; > + client->retcode = q_ret; > + } else if (b_ret) { > + client->b_fail += 1; > + client->retcode = q_ret = b_ret; > + } else { > + client->retcode = 0; > + } > + spin_unlock_irqrestore(&client->wq_lock, flags); > + > + return q_ret; > +} > + > +/* > + * Everything below here is concerned with setup & teardown, and is > + * therefore not part of the somewhat time-critical batch-submission > + * path of i915_guc_submit() above. > + */ > + > +/** > * gem_allocate_guc_obj() - Allocate gem object for GuC usage > * @dev: drm device > * @size: size of object > @@ -75,6 +581,121 @@ static void gem_release_guc_obj(struct drm_i915_gem_object *obj) > drm_gem_object_unreference(&obj->base); > } > > +static void guc_client_free(struct drm_device *dev, > + struct i915_guc_client *client) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc *guc = &dev_priv->guc; > + > + if (!client) > + return; > + > + if (client->doorbell_id != GUC_INVALID_DOORBELL_ID) { > + /* > + * First disable the doorbell, then tell the GuC we've > + * finished with it, finally deallocate it in our bitmap > + */ > + guc_disable_doorbell(guc, client); > + host2guc_release_doorbell(guc, client); > + release_doorbell(guc, client->doorbell_id); > + } > + > + /* > + * XXX: wait for any outstanding submissions before freeing memory. > + * Be sure to drop any locks > + */ > + > + gem_release_guc_obj(client->client_obj); > + > + if (client->ctx_index != GUC_INVALID_CTX_ID) { > + guc_fini_ctx_desc(guc, client); > + ida_simple_remove(&guc->ctx_ids, client->ctx_index); > + } > + > + kfree(client); > +} > + > +/** > + * guc_client_alloc() - Allocate an i915_guc_client > + * @dev: drm device > + * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW > + * The kernel client to replace ExecList submission is created with > + * NORMAL priority. Priority of a client for scheduler can be HIGH, > + * while a preemption context can use CRITICAL. > + * > + * Return: An i915_guc_client object if success. > + */ > +static struct i915_guc_client *guc_client_alloc(struct drm_device *dev, > + uint32_t priority) > +{ > + struct i915_guc_client *client; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc *guc = &dev_priv->guc; > + struct drm_i915_gem_object *obj; > + > + client = kzalloc(sizeof(*client), GFP_KERNEL); > + if (!client) > + return NULL; > + > + client->doorbell_id = GUC_INVALID_DOORBELL_ID; > + client->priority = priority; > + > + client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, > + GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); > + if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) { > + client->ctx_index = GUC_INVALID_CTX_ID; > + goto err; > + } > + > + /* The first page is doorbell/proc_desc. Two followed pages are wq. */ > + obj = gem_allocate_guc_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE); > + if (!obj) > + goto err; > + > + client->client_obj = obj; > + client->wq_offset = GUC_DB_SIZE; > + client->wq_size = GUC_WQ_SIZE; > + spin_lock_init(&client->wq_lock); > + > + client->doorbell_offset = select_doorbell_cacheline(guc); > + > + /* > + * Since the doorbell only requires a single cacheline, we can save > + * space by putting the application process descriptor in the same > + * page. Use the half of the page that doesn't include the doorbell. > + */ > + if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) > + client->proc_desc_offset = 0; > + else > + client->proc_desc_offset = (GUC_DB_SIZE / 2); > + > + client->doorbell_id = assign_doorbell(guc, client->priority); > + if (client->doorbell_id == GUC_INVALID_DOORBELL_ID) > + /* XXX: evict a doorbell instead */ > + goto err; > + > + guc_init_proc_desc(guc, client); > + guc_init_ctx_desc(guc, client); > + guc_init_doorbell(guc, client); > + > + /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ > + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); > + > + /* XXX: Any cache flushes needed? General domain mgmt calls? */ > + > + if (host2guc_allocate_doorbell(guc, client)) > + goto err; > + > + DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u db_id %u\n", > + priority, client, client->ctx_index, client->doorbell_id); > + > + return client; > + > +err: > + guc_client_free(dev, client); > + return NULL; > +} > + > static void guc_create_log(struct intel_guc *guc) > { > struct drm_i915_private *dev_priv = guc_to_i915(guc); > @@ -138,6 +759,8 @@ int i915_guc_submission_init(struct drm_device *dev) > if (!guc->ctx_pool_obj) > return -ENOMEM; > > + spin_lock_init(&dev_priv->guc.host2guc_lock); > + > ida_init(&guc->ctx_ids); > > guc_create_log(guc); > @@ -145,6 +768,32 @@ int i915_guc_submission_init(struct drm_device *dev) > return 0; > } > > +int i915_guc_submission_enable(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc *guc = &dev_priv->guc; > + struct i915_guc_client *client; > + > + /* client for execbuf submission */ > + client = guc_client_alloc(dev, GUC_CTX_PRIORITY_NORMAL); > + if (!client) { > + DRM_ERROR("Failed to create execbuf guc_client\n"); > + return -ENOMEM; > + } > + > + guc->execbuf_client = client; > + return 0; > +} > + > +void i915_guc_submission_disable(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc *guc = &dev_priv->guc; > + > + guc_client_free(dev, guc->execbuf_client); > + guc->execbuf_client = NULL; > +} > + > void i915_guc_submission_fini(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h > index 5b51b05..d249326 100644 > --- a/drivers/gpu/drm/i915/intel_guc.h > +++ b/drivers/gpu/drm/i915/intel_guc.h > @@ -27,6 +27,30 @@ > #include "intel_guc_fwif.h" > #include "i915_guc_reg.h" > > +struct i915_guc_client { > + struct drm_i915_gem_object *client_obj; > + uint32_t priority; > + uint32_t ctx_index; > + > + uint32_t proc_desc_offset; > + uint32_t doorbell_offset; > + uint32_t cookie; > + uint16_t doorbell_id; > + uint16_t padding; /* Maintain alignment */ > + > + uint32_t wq_offset; > + uint32_t wq_size; > + > + spinlock_t wq_lock; /* Protects all data below */ > + uint32_t wq_tail; > + > + /* GuC submission statistics & status */ > + uint64_t submissions; > + uint32_t q_fail; > + uint32_t b_fail; > + int retcode; > +}; > + > enum intel_guc_fw_status { > GUC_FIRMWARE_FAIL = -1, > GUC_FIRMWARE_NONE = 0, > @@ -60,6 +84,20 @@ struct intel_guc { > > struct drm_i915_gem_object *ctx_pool_obj; > struct ida ctx_ids; > + > + struct i915_guc_client *execbuf_client; > + > + spinlock_t host2guc_lock; /* Protects all data below */ > + > + DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS); > + int db_cacheline; > + > + /* Action status & statistics */ > + uint64_t action_count; /* Total commands issued */ > + uint32_t action_cmd; /* Last command word */ > + uint32_t action_status; /* Last return status */ > + uint32_t action_fail; /* Total number of failures */ > + int32_t action_err; /* Last error code */ > }; > > /* intel_guc_loader.c */ > @@ -70,6 +108,10 @@ extern const char *intel_guc_fw_status_repr(enum intel_guc_fw_status status); > > /* i915_guc_submission.c */ > int i915_guc_submission_init(struct drm_device *dev); > +int i915_guc_submission_enable(struct drm_device *dev); > +int i915_guc_submit(struct i915_guc_client *client, > + struct drm_i915_gem_request *rq); > +void i915_guc_submission_disable(struct drm_device *dev); > void i915_guc_submission_fini(struct drm_device *dev); > > #endif > diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c > index e5d7136..25ba29f 100644 > --- a/drivers/gpu/drm/i915/intel_guc_loader.c > +++ b/drivers/gpu/drm/i915/intel_guc_loader.c > @@ -427,6 +427,8 @@ int intel_guc_ucode_load(struct drm_device *dev) > intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), > intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); > > + i915_guc_submission_disable(dev); > + > if (guc_fw->guc_fw_fetch_status == GUC_FIRMWARE_NONE) > return 0; > > @@ -479,12 +481,20 @@ int intel_guc_ucode_load(struct drm_device *dev) > intel_guc_fw_status_repr(guc_fw->guc_fw_fetch_status), > intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); > > + if (i915.enable_guc_submission) { > + err = i915_guc_submission_enable(dev); > + if (err) > + goto fail; > + } > + > return 0; > > fail: > if (guc_fw->guc_fw_load_status == GUC_FIRMWARE_PENDING) > guc_fw->guc_fw_load_status = GUC_FIRMWARE_FAIL; > > + i915_guc_submission_disable(dev); > + > DRM_ERROR("Failed to initialize GuC, error %d\n", err); > > return err; > @@ -547,6 +557,8 @@ void intel_guc_ucode_fini(struct drm_device *dev) > struct drm_i915_private *dev_priv = dev->dev_private; > struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; > > + i915_guc_submission_fini(dev); > + > if (guc_fw->guc_fw_obj) > drm_gem_object_unreference(&guc_fw->guc_fw_obj->base); > guc_fw->guc_fw_obj = NULL; > -- > 1.9.1 > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx