From: Alex Dai <yu.dai@xxxxxxxxx> Implementation of GuC client. A GuC client has its own doorbell and workqueue. It maintains the doorbell cache line, process description object and work queue item. A default guc_client is created to do the in-order legacy execlist submission. Issue: VIZ-4884 Signed-off-by: Alex Dai <yu.dai@xxxxxxxxx> --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_drv.h | 5 + drivers/gpu/drm/i915/intel_guc.h | 52 ++++ drivers/gpu/drm/i915/intel_guc_client.c | 430 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc_loader.c | 7 + drivers/gpu/drm/i915/intel_guc_scheduler.c | 29 +- 6 files changed, 523 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_guc_client.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 50b2057..0407720 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -42,7 +42,8 @@ i915-y += i915_cmd_parser.o \ # ancilliary microcontroller support i915-y += intel_uc_loader.o \ - intel_guc_loader.o + intel_guc_loader.o \ + intel_guc_client.o # autogenerated null render state i915-y += intel_renderstate_gen6.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4134db9..6760b9a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1850,6 +1850,11 @@ static inline struct drm_i915_private *dev_to_i915(struct device *dev) return to_i915(dev_get_drvdata(dev)); } +static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) +{ + return container_of(guc, struct drm_i915_private, guc); +} + /* Iterate over initialised rings */ #define for_each_ring(ring__, dev_priv__, i__) \ for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \ diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index d49549c..72c4ce2 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -27,11 +27,38 @@ #include "intel_guc_api.h" #include "intel_uc_loader.h" +#define GUC_DB_SIZE PAGE_SIZE +#define GUC_WQ_SIZE (PAGE_SIZE * 2) + +struct i915_guc_client { + struct drm_i915_gem_object *client_obj; + u32 priority; + off_t doorbell_offset; + off_t proc_desc_offset; + off_t wq_offset; + uint16_t doorbell_id; + uint32_t ctx_index; + uint32_t wq_size; +}; + +#define I915_MAX_DOORBELLS 256 +#define INVALID_DOORBELL_ID I915_MAX_DOORBELLS + +#define INVALID_CTX_ID (MAX_GUC_GPU_CONTEXTS+1) + struct intel_guc { struct intel_uc_fw guc_fw; /* GuC-specific additions */ + spinlock_t host2guc_lock; + struct drm_i915_gem_object *ctx_pool_obj; + + struct i915_guc_client *execbuf_client; + + struct ida ctx_ids; + int db_cacheline; + DECLARE_BITMAP(doorbell_bitmap, I915_MAX_DOORBELLS); }; #define GUC_STATUS 0xc000 @@ -98,6 +125,22 @@ struct intel_guc { GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA | \ GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA) +#define HOST2GUC_INTERRUPT 0xc4c8 +#define HOST2GUC_TRIGGER (1<<0) + +#define DRBMISC1 0x1984 +#define DOORBELL_ENABLE (1<<0) + +#define GEN8_DRBREGL(x) (0x1000 + (x) * 8) +#define GEN8_DRB_VALID (1<<0) +#define GEN8_DRBREGU(x) (0x1000 + (x) * 8 + 4) + +#define GEN8_GT_PM_CONFIG 0x138140 +#define GEN8_GT_DOORBELL_ENABLE (1<<0) + +#define GEN8_GTCR 0x4274 +#define GEN8_GTCR_INVALIDATE (1<<0) + /* intel_guc_loader.c */ extern int intel_guc_load_ucode(struct drm_device *dev, bool wait); extern void intel_guc_ucode_fini(struct drm_device *dev); @@ -112,4 +155,13 @@ void guc_scheduler_fini(struct drm_device *dev); int guc_scheduler_enable(struct drm_device *dev); void guc_scheduler_disable(struct drm_device *dev); +/* intel_guc_client.c */ +struct i915_guc_client* +i915_guc_client_alloc(struct drm_device *dev, u32 priority); +void i915_guc_client_free(struct drm_device *dev, + struct i915_guc_client *client); +int i915_guc_client_submit(struct i915_guc_client *client, + struct intel_context *ctx, + struct intel_engine_cs *ring); + #endif diff --git a/drivers/gpu/drm/i915/intel_guc_client.c b/drivers/gpu/drm/i915/intel_guc_client.c new file mode 100644 index 0000000..094c553 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_client.c @@ -0,0 +1,430 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include "i915_drv.h" +#include "intel_guc.h" + +/** + * DOC: GuC Client + * + * i915_guc_client: + * We use the term client to avoid confusion with contexts. A i915_guc_client is + * equivalent to GuC object guc_context_desc. This context descriptor is + * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell + * and workqueue for it. Also the process descriptor (guc_process_desc), which + * is mapped to client space. So the client can write Work Item then ring the + * doorbell. + * + * To simplify the implementation, we allocate one gem object that contains all + * pages for doorbell, process descriptor and workqueue. + * + * The Scratch registers: + * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes + * a value to the action register (SOFT_SCRATCH_0) along with any data. It then + * triggers an interrupt on the GuC via another register write (0xC4C8). + * Firmware writes a success/fail code back to the action register after + * processes the request. The kernel driver polls waiting for this update and + * then proceeds. + * See intel_guc_action() + * + * Doorbells: + * Doorbells are interrupts to uKernel. A doorbell is a single cache line (QW) + * mapped into process space. + * + */ + +/* + * Read GuC command/status register (SOFT_SCRATCH_0) + * Return true if it contains a response rather than a command + */ +static inline bool i915_guc_get_response(struct drm_i915_private *dev_priv, + u32 *status) +{ + u32 val = I915_READ(SOFT_SCRATCH(0)); + *status = val; + return GUC2HOST_IS_RESPONSE(val); +} + +static int intel_guc_action(struct intel_guc *guc, u32 *data, u32 len) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 status; + int i; + int ret; + + if (WARN_ON(len < 1 || len > 15)) + return -EINVAL; + + spin_lock(&dev_priv->guc.host2guc_lock); + + for (i = 0; i < len; i++) + I915_WRITE(SOFT_SCRATCH(i), data[i]); + + POSTING_READ(SOFT_SCRATCH(i - 1)); + + I915_WRITE(HOST2GUC_INTERRUPT, HOST2GUC_TRIGGER); + + ret = wait_for_atomic(i915_guc_get_response(dev_priv, &status), 10); + if (status != GUC2HOST_STATUS_SUCCESS) { + /* either GuC doesn't response, which is a TIMEOUT, + * or a failure code is returned. */ + if (ret != -ETIMEDOUT) + ret = -EIO; + + DRM_ERROR("GUC: host2guc action 0x%X failed. ret=%d " + "status=0x%08X response=0x%08X\n", + data[0], ret, status, + I915_READ(SOFT_SCRATCH(15))); + } + + spin_unlock(&dev_priv->guc.host2guc_lock); + + return ret; +} + +static void guc_release_doorbell(struct intel_guc *guc, uint16_t id) +{ + spin_lock(&guc->host2guc_lock); + bitmap_clear(guc->doorbell_bitmap, id, 1); + spin_unlock(&guc->host2guc_lock); +} + +static uint16_t guc_assign_doorbell(struct intel_guc *guc, u32 priority) +{ + const uint16_t size = I915_MAX_DOORBELLS; + uint16_t id; + + spin_lock(&guc->host2guc_lock); + + /* The bitmap is split into two halves - high and normal priority. */ + if (priority <= GUC_CTX_PRIORITY_HIGH) { + id = find_next_zero_bit(guc->doorbell_bitmap, size, size / 2); + if (id == size) + id = INVALID_DOORBELL_ID; + } else { + id = find_next_zero_bit(guc->doorbell_bitmap, size / 2, 0); + if (id == size / 2) + id = INVALID_DOORBELL_ID; + } + + if (id != INVALID_DOORBELL_ID) + bitmap_set(guc->doorbell_bitmap, id, 1); + + spin_unlock(&guc->host2guc_lock); + + return id; +} + +static off_t guc_select_doorbell_cacheline(struct intel_guc *guc) +{ + const int cacheline_size = boot_cpu_data.x86_clflush_size; + const int cacheline_per_page = PAGE_SIZE / cacheline_size; + off_t offset; + + spin_lock(&guc->host2guc_lock); + + /* Doorbell uses single cache line */ + offset = cacheline_size * guc->db_cacheline; + + /* Moving to next cache line to reduce contention */ + guc->db_cacheline = (guc->db_cacheline + 1) % cacheline_per_page; + + spin_unlock(&guc->host2guc_lock); + + return offset; +} + +static void init_ctx_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_context_desc desc; + struct sg_table *sg; + + memset(&desc, 0, sizeof(desc)); + + desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL; + desc.context_id = client->ctx_index; + desc.priority = client->priority; + desc.engines_used = (1 << RCS) | (1 << VCS) | (1 << BCS) | + (1 << VECS) | (1 << VCS2); /* all engines */ + desc.db_id = client->doorbell_id; + + /* + * The CPU address is only needed at certain points, so kmap_atomic on + * demand instead of storing it in the ctx descriptor. + * XXX: May make debug easier to have it mapped + */ + desc.db_trigger_cpu = 0; + desc.db_trigger_uk = client->doorbell_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + desc.db_trigger_phy = client->doorbell_offset + + sg_dma_address(client->client_obj->pages->sgl); + + desc.process_desc = client->proc_desc_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + + desc.wq_addr = client->wq_offset + + i915_gem_obj_ggtt_offset(client->client_obj); + + desc.wq_size = client->wq_size; + + /* + * XXX: Take LRCs from an existing intel_context if this is not an + * IsKMDCreatedContext client + */ + desc.desc_private = (uintptr_t)client; + + /* Pool context is pinned already */ + sg = guc->ctx_pool_obj->pages; + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), + sizeof(desc) * client->ctx_index); +} + +static void fini_ctx_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_context_desc desc; + struct sg_table *sg; + + memset(&desc, 0, sizeof(desc)); + + sg = guc->ctx_pool_obj->pages; + sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), + sizeof(desc) * client->ctx_index); +} + +static void init_proc_desc(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_process_desc *desc; + void *base; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + desc = base + client->proc_desc_offset; + + memset(desc, 0, sizeof(*desc)); + + /* + * XXX: pDoorbell and WQVBaseAddress are pointers in process address + * space for ring3 clients (set them as in mmap_ioctl) or kernel + * space for kernel clients (map on demand instead? May make debug + * easier to have it mapped). + */ + desc->wq_base_addr = 0; + desc->db_base_addr = 0; + + desc->context_id = client->ctx_index; + desc->wq_size_bytes = client->wq_size; + desc->wq_status = WQ_STATUS_ACTIVE; + desc->priority = client->priority; + + kunmap_atomic(base); +} + +static int host2guc_allocate_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + u32 data[2]; + + data[0] = HOST2GUC_ACTION_ALLOCATE_DOORBELL; + data[1] = client->ctx_index; + + return intel_guc_action(guc, data, 2); +} + +static int host2guc_release_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + u32 data[2]; + + data[0] = HOST2GUC_ACTION_DEALLOCATE_DOORBELL; + data[1] = client->ctx_index; + + return intel_guc_action(guc, data, 2); +} + +static void init_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct guc_doorbell_info *doorbell; + void *base; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + doorbell = base + client->doorbell_offset; + + doorbell->db_status = 1; + doorbell->cookie = 0; + + kunmap_atomic(base); +} + +static void disable_doorbell(struct intel_guc *guc, + struct i915_guc_client *client) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct guc_doorbell_info *doorbell; + void *base; + int drbreg = GEN8_DRBREGL(client->doorbell_id); + int value; + + base = kmap_atomic(i915_gem_object_get_page(client->client_obj, 0)); + doorbell = base + client->doorbell_offset; + + doorbell->db_status = 0; + + kunmap_atomic(base); + + I915_WRITE(drbreg, I915_READ(drbreg) & ~GEN8_DRB_VALID); + + value = I915_READ(drbreg); + WARN_ON((value & GEN8_DRB_VALID) != 0); + + I915_WRITE(GEN8_DRBREGU(client->doorbell_id), 0); + I915_WRITE(drbreg, 0); + + /* XXX: wait for any interrupts */ + /* XXX: wait for workqueue to drain */ +} + +void i915_guc_client_free(struct drm_device *dev, + struct i915_guc_client *client) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + + if (!client) + return; + + if (client->doorbell_id != INVALID_DOORBELL_ID) { + disable_doorbell(guc, client); + host2guc_release_doorbell(guc, client); + guc_release_doorbell(guc, client->doorbell_id); + } + + /* + * XXX: wait for any outstanding submissions before freeing memory. + * Be sure to drop any locks + */ + + intel_guc_release_gem_obj(client->client_obj); + + if (client->ctx_index != INVALID_CTX_ID) { + fini_ctx_desc(guc, client); + ida_simple_remove(&guc->ctx_ids, client->ctx_index); + } + + kfree(client); +} + +/** + * i915_guc_client_alloc() - Allocate an i915_guc_client + * @dev: drm device + * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW + * The kernel client to replace ExecList submission is created with + * NORMAL priority. Priority of a client for scheduler can be HIGH, + * while a preemption context can use CRITICAL. + * + * Return: An i915_guc_client object if success. + */ +struct i915_guc_client* +i915_guc_client_alloc(struct drm_device *dev, u32 priority) +{ + struct i915_guc_client *client; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + struct drm_i915_gem_object *obj; + + client = kzalloc(sizeof(*client), GFP_KERNEL); + if (!client) + return NULL; + + client->doorbell_id = INVALID_DOORBELL_ID; + client->priority = priority; + + client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, + MAX_GUC_GPU_CONTEXTS, GFP_KERNEL); + if (client->ctx_index >= MAX_GUC_GPU_CONTEXTS) { + client->ctx_index = INVALID_CTX_ID; + goto err; + } + + /* The first page is doorbell/proc_desc. Two followed pages are wq. */ + obj = intel_guc_allocate_gem_obj(dev, GUC_DB_SIZE + GUC_WQ_SIZE); + if (!obj) + goto err; + + client->client_obj = obj; + client->wq_offset = GUC_DB_SIZE; + client->wq_size = GUC_WQ_SIZE; + + client->doorbell_offset = guc_select_doorbell_cacheline(guc); + + /* + * Since the doorbell only requires a single cacheline, we can save + * space by putting the application process descriptor in the same + * page. Use the half of the page that doesn't include the doorbell. + */ + if (client->doorbell_offset >= (GUC_DB_SIZE / 2)) + client->proc_desc_offset = 0; + else + client->proc_desc_offset = (GUC_DB_SIZE / 2); + + client->doorbell_id = guc_assign_doorbell(guc, client->priority); + if (client->doorbell_id == INVALID_DOORBELL_ID) + /* XXX: evict a doorbell instead */ + goto err; + + init_ctx_desc(guc, client); + init_proc_desc(guc, client); + init_doorbell(guc, client); + + /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ + I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + + /* XXX: Any cache flushes needed? General domain mgmt calls? */ + + if (host2guc_allocate_doorbell(guc, client)) + goto err; + + return client; + +err: + i915_guc_client_free(dev, client); + return NULL; +} + +/** + * i915_guc_client_submit() - Submit commands through GuC + * @client: the guc client where commands will go through + * @ctx: LRC where commands come from + * @ring: HW engine that will excute the commands + * + * Return: 0 if succeed + */ +int i915_guc_client_submit(struct i915_guc_client *client, + struct intel_context *ctx, + struct intel_engine_cs *ring) +{ + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 15c055a..c784d64 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -294,12 +294,19 @@ static int guc_load_ucode(struct drm_device *dev) /* Set MMIO/WA for GuC init */ + I915_WRITE(DRBMISC1, DOORBELL_ENABLE); + /* Enable MIA caching. GuC clock gating is disabled. */ I915_WRITE(GUC_SHIM_CONTROL, GUC_SHIM_CONTROL_VALUE); /* WaC6DisallowByGfxPause*/ I915_WRITE(GEN6_GFXPAUSE, 0x30FFF); + if (IS_SKYLAKE(dev)) + I915_WRITE(GEN9_GT_PM_CONFIG, GEN8_GT_DOORBELL_ENABLE); + else + I915_WRITE(GEN8_GT_PM_CONFIG, GEN8_GT_DOORBELL_ENABLE); + if (IS_GEN9(dev)) { /* DOP Clock Gating Enable for GuC clocks */ I915_WRITE(GEN7_MISCCPCTL, (GEN8_DOP_CLOCK_GATE_GUC_ENABLE | diff --git a/drivers/gpu/drm/i915/intel_guc_scheduler.c b/drivers/gpu/drm/i915/intel_guc_scheduler.c index 1047192..962443c 100644 --- a/drivers/gpu/drm/i915/intel_guc_scheduler.c +++ b/drivers/gpu/drm/i915/intel_guc_scheduler.c @@ -36,6 +36,8 @@ void guc_scheduler_fini(struct drm_device *dev) if (ctx_pool) { intel_guc_release_gem_obj(ctx_pool); guc->ctx_pool_obj = NULL; + + ida_destroy(&guc->ctx_ids); } } @@ -60,19 +62,42 @@ int guc_scheduler_init(struct drm_device *dev) if (!guc->ctx_pool_obj) return -ENOMEM; + spin_lock_init(&dev_priv->guc.host2guc_lock); + + ida_init(&guc->ctx_ids); + + memset(guc->doorbell_bitmap, 0, sizeof(guc->doorbell_bitmap)); + guc->db_cacheline = 0; + return 0; } int guc_scheduler_enable(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + if (!i915.enable_guc_scheduling) return 0; - /* TODO: placeholder for guc scheduler enabling */ + /* client for execbuf submission */ + guc->execbuf_client = + i915_guc_client_alloc(dev, GUC_CTX_PRIORITY_NORMAL); + if (!guc->execbuf_client) { + DRM_ERROR("Failed to create execbuf guc_client\n"); + return -ENOMEM; + } + return 0; } void guc_scheduler_disable(struct drm_device *dev) { - /* TODO: placeholder for guc scheduler disabling */ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc *guc = &dev_priv->guc; + + if (guc->execbuf_client) { + i915_guc_client_free(dev, guc->execbuf_client); + guc->execbuf_client = NULL; + } } -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx