On 29/08/18 12:18, Michal Wajdeczko wrote:
New GuC stage descriptor stores information about all possible HW contexts
that use it. The idea is that every direct-submission GuC client gets one
SW Context ID and every HW context created by that client gets one SW
Counter (up to 64 entries). The correct SW Context ID and SW Counter now
get passed on every work queue item.
Signed-off-by: Michel Thierry <michel.thierry@xxxxxxxxx>
Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx>
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@xxxxxxxxx>
Cc: Michel Thierry <michel.thierry@xxxxxxxxx>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx>
Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
Cc: Tomasz Lis <tomasz.lis@xxxxxxxxx>
Cc: Michal Winiarski <michal.winiarski@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_debugfs.c | 9 +-
drivers/gpu/drm/i915/i915_utils.h | 12 ++
drivers/gpu/drm/i915/intel_guc_fwif.h | 67 ++++-----
drivers/gpu/drm/i915/intel_guc_submission.c | 205 +++++++++++++++++++---------
4 files changed, 190 insertions(+), 103 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a5265c2..ad09970 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2455,11 +2455,10 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
seq_printf(m, "GuC stage descriptor %u:\n", index);
seq_printf(m, "\tIndex: %u\n", desc->stage_id);
+ seq_printf(m, "\tProxy Index: %u\n", desc->proxy_id);
seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
seq_printf(m, "\tPriority: %d\n", desc->priority);
seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
- seq_printf(m, "\tEngines used: 0x%x\n",
- desc->engines_used);
seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
desc->db_trigger_phy,
desc->db_trigger_cpu,
@@ -2471,10 +2470,10 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
seq_putc(m, '\n');
for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
- u32 guc_engine_id = engine->guc_id;
+ u32 class = GUC_ID_TO_ENGINE_CLASS(engine->guc_id);
+ u32 inst = GUC_ID_TO_ENGINE_INSTANCE(engine->guc_id);
struct guc_execlist_context *lrc =
- &desc->lrc[guc_engine_id];
-
+ &desc->lrc[class][inst];
seq_printf(m, "\t%s LRC:\n", engine->name);
seq_printf(m, "\t\tContext desc: 0x%x\n",
lrc->context_desc);
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 395dd25..7ec1fe4 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -118,6 +118,18 @@ static inline u64 ptr_to_u64(const void *ptr)
__idx; \
})
+#define bitmap32_test_bit(bitmap, bit) ({ \
+ (bitmap)[(bit) / 32] & (1 << ((bit) % 32)); \
+})
+
+#define bitmap32_set_bit(bitmap, bit) ({ \
+ (bitmap)[(bit) / 32] |= (1 << ((bit) % 32)); \
+})
+
+#define bitmap32_clear_bit(bitmap, bit) ({ \
+ (bitmap)[(bit) / 32] &= ~(1 << ((bit) % 32)); \
+})
+
#include <linux/list.h>
static inline int list_is_first(const struct list_head *list,
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 227ab32..0a897cd 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -29,9 +29,11 @@
#define GUC_CLIENT_PRIORITY_NORMAL 3
#define GUC_CLIENT_PRIORITY_NUM 4
-#define GUC_MAX_STAGE_DESCRIPTORS 1024
+#define GUC_MAX_STAGE_DESCRIPTORS 2032
#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS
+#define GUC_MAX_LRC_PER_CLASS 64
+
#define GUC_RENDER_ENGINE 0
#define GUC_VIDEO_ENGINE 1
#define GUC_BLITTER_ENGINE 2
@@ -71,9 +73,12 @@
#define GUC_DOORBELL_DISABLED 0
#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
-#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
-#define GUC_STAGE_DESC_ATTR_KERNEL BIT(2)
-#define GUC_STAGE_DESC_ATTR_PREEMPT BIT(3)
+#define GUC_STAGE_DESC_ATTR_TYPE_SHIFT 1
+#define GUC_STAGE_DESC_ATTR_PRINCIPAL (0x0 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_PROXY (0x1 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_REAL (0x2 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_TYPE_MASK (0x3 << GUC_STAGE_DESC_ATTR_TYPE_SHIFT)
+#define GUC_STAGE_DESC_ATTR_KERNEL (1 << 3)
#define GUC_STAGE_DESC_ATTR_RESET BIT(4)
#define GUC_STAGE_DESC_ATTR_WQLOCKED BIT(5)
#define GUC_STAGE_DESC_ATTR_PCH BIT(6)
@@ -282,9 +287,10 @@ struct guc_process_desc {
u64 wq_base_addr;
u32 wq_size_bytes;
u32 wq_status;
- u32 engine_presence;
u32 priority;
- u32 reserved[30];
+ u32 token;
+ u32 queue_engine_error;
+ u32 reserved[23];
} __packed;
/* engine id and context id is packed into guc_execlist_context.context_id*/
@@ -295,16 +301,19 @@ struct guc_process_desc {
struct guc_execlist_context {
u32 context_desc;
u32 context_id;
- u32 ring_status;
+ u32 reserved0;
u32 ring_lrca;
u32 ring_begin;
u32 ring_end;
u32 ring_next_free_location;
u32 ring_current_tail_pointer_value;
- u8 engine_state_submit_value;
- u8 engine_state_wait_value;
- u16 pagefault_count;
- u16 engine_submit_queue_count;
+ u32 engine_state_wait_value;
+ u32 state_reserved;
+ u32 is_present_in_sq;
+ u32 sync_value;
+ u32 sync_addr;
+ u32 slpc_hints;
+ u32 reserved1[4];
} __packed;
/*
@@ -317,36 +326,30 @@ struct guc_execlist_context {
* with the GuC, being allocated before the GuC is loaded with its firmware.
*/
struct guc_stage_desc {
- u32 sched_common_area;
+ u64 desc_private;
u32 stage_id;
- u32 pas_id;
- u8 engines_used;
+ u32 proxy_id;
u64 db_trigger_cpu;
u32 db_trigger_uk;
u64 db_trigger_phy;
- u16 db_id;
-
- struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM];
-
- u8 attribute;
-
- u32 priority;
-
+ u32 db_id;
+ struct guc_execlist_context lrc[GUC_MAX_ENGINE_CLASSES][GUC_MAX_LRC_PER_CLASS];
+ u32 lrc_bitmap[GUC_MAX_ENGINE_CLASSES][3];
+ u32 lrc_count;
+ u32 max_lrc_per_class;
+ u32 attribute; /* GUC_STAGE_DESC_ATTR_xxx */
+ u32 priority; /* GUC_CLIENT_PRIORITY_xxx */
u32 wq_sampled_tail_offset;
u32 wq_total_submit_enqueues;
-
u32 process_desc;
u32 wq_addr;
u32 wq_size;
-
- u32 engine_presence;
-
- u8 engine_suspended;
-
- u8 reserved0[3];
- u64 reserved1[1];
-
- u64 desc_private;
+ u32 feature0;
+ u32 feature1;
+ u32 feature2;
+ u32 queue_engine_error;
+ u32 reserved[2];
+ u64 reserved3[12];
} __packed;
/**
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 07b9d31..54655dc 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -46,17 +46,22 @@
* that contains all required pages for these elements).
*
* GuC stage descriptor:
- * During initialization, the driver allocates a static pool of 1024 such
- * descriptors, and shares them with the GuC.
- * Currently, there exists a 1:1 mapping between a intel_guc_client and a
- * guc_stage_desc (via the client's stage_id), so effectively only one
- * gets used. This stage descriptor lets the GuC know about the doorbell,
- * workqueue and process descriptor. Theoretically, it also lets the GuC
- * know about our HW contexts (context ID, etc...), but we actually
- * employ a kind of submission where the GuC uses the LRCA sent via the work
- * item instead (the single guc_stage_desc associated to execbuf client
- * contains information about the default kernel context only, but this is
- * essentially unused). This is called a "proxy" submission.
+ * During initialization, the driver allocates a static pool of descriptors
+ * and shares them with the GuC. This stage descriptor lets the GuC know about
+ * the doorbell, workqueue and process descriptor, additionally it stores
+ * information about all possible HW contexts that use it (64 x number of
+ * engine classes of guc_execlist_context structs).
+ *
+ * The idea is that every direct-submission GuC client gets one SW Context ID
+ * and every HW context created by that client gets one SW Counter. The "SW
+ * Context ID" and "SW Counter" to use now get passed on every work queue item.
+ *
+ * But we don't have direct submission yet: does that mean we are limited to 64
+ * contexts in total (one client)? Not really: we can use extra GuC context
+ * descriptors to store more HW contexts. They are special in that they don't
+ * have their own work queue, doorbell or process descriptor. Instead, these
+ * "principal" GuC context descriptors use the one that belongs to the client
+ * as a "proxy" for submission (a generalization of the old proxy submission).
AFAICS, with the implementation in this patch, a stage_desc can be both
"proxy" and "principal". This doesn't match the expectation of the FW; I
have confirmed with GuC FW engineers that they expect each stage
descriptor used by the kernel to be either one or the other and the
interface has different values for the same field to distinguish. The
only case where proxy stuff (db, wq) and principal stuff (lrcs) are
together should be for descriptors marked with GUC_STAGE_DESC_ATTR_REAL
which AFAIU are the ones used for direct submission.
Things still kinda work when we do this with only 1 client since the
only proxy we have is at the same time the principal of the
kernel_context, so it is somehow a special and more stable case, but the
abstraction doesn't really scale to more clients. I think a cleaner
solution would be to reserve a handful of stage descriptors (or even
just one) at the end of the array to work as proxies and use the
remaining descriptors as principals. This way we can have a cleaner
in-code separation of the 2 usages with very little cost. The only
impact I can see is in the selftests as those won't be able to create
256 clients anymore, but I'm sure we can find a way to override the
reservation limit for selftest only as those would only need proxy
entries anyway.
Also, each principal is tied to a single proxy via desc->proxy_id, so
that also doesn't work with the current implementation.
Thoughts?
I'm going to skip detailed code review while we discuss the above.
Thanks,
Daniele
*
* The Scratch registers:
* There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
@@ -171,6 +176,16 @@ static struct guc_stage_desc *__get_stage_desc(struct intel_guc_client *client)
return &base[client->stage_id];
}
+static struct guc_stage_desc *__get_ppal_stage_desc(struct intel_guc *guc,
+ u32 index)
+{
+ struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
+
+ GEM_BUG_ON(index >= GUC_MAX_STAGE_DESCRIPTORS);
+
+ return &base[index];
+}
+
/*
* Initialise, update, or clear doorbell data shared with the GuC
*
@@ -344,70 +359,20 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
static void guc_stage_desc_init(struct intel_guc *guc,
struct intel_guc_client *client)
{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct intel_engine_cs *engine;
- struct i915_gem_context *ctx = client->owner;
struct guc_stage_desc *desc;
- unsigned int tmp;
u32 gfx_addr;
desc = __get_stage_desc(client);
memset(desc, 0, sizeof(*desc));
desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
+ GUC_STAGE_DESC_ATTR_PROXY |
GUC_STAGE_DESC_ATTR_KERNEL;
- if (is_high_priority(client))
- desc->attribute |= GUC_STAGE_DESC_ATTR_PREEMPT;
desc->stage_id = client->stage_id;
+ desc->proxy_id = client->stage_id;
desc->priority = client->priority;
desc->db_id = client->doorbell_id;
- for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
- struct intel_context *ce = to_intel_context(ctx, engine);
- u32 guc_engine_id = engine->guc_id;
- struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
-
- /* TODO: We have a design issue to be solved here. Only when we
- * receive the first batch, we know which engine is used by the
- * user. But here GuC expects the lrc and ring to be pinned. It
- * is not an issue for default context, which is the only one
- * for now who owns a GuC client. But for future owner of GuC
- * client, need to make sure lrc is pinned prior to enter here.
- */
- if (!ce->state)
- break; /* XXX: continue? */
-
- /*
- * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy
- * submission or, in other words, not using a direct submission
- * model) the KMD's LRCA is not used for any work submission.
- * Instead, the GuC uses the LRCA of the user mode context (see
- * guc_add_request below).
- */
- lrc->context_desc = lower_32_bits(ce->lrc_desc);
-
- /* The state page is after PPHWSP */
- lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) +
- LRC_STATE_PN * PAGE_SIZE;
-
- /* XXX: In direct submission, the GuC wants the HW context id
- * here. In proxy submission, it wants the stage id
- */
- lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
- (guc_engine_id << GUC_ELC_ENGINE_OFFSET);
-
- lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
- lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
- lrc->ring_next_free_location = lrc->ring_begin;
- lrc->ring_current_tail_pointer_value = 0;
-
- desc->engines_used |= (1 << guc_engine_id);
- }
-
- DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
- client->engines, desc->engines_used);
- WARN_ON(desc->engines_used == 0);
-
/*
* The doorbell, process descriptor, and workqueue are all parts
* of the client object, which the GuC will reference via the GGTT
@@ -430,7 +395,15 @@ static void guc_stage_desc_fini(struct intel_guc *guc,
struct guc_stage_desc *desc;
desc = __get_stage_desc(client);
- memset(desc, 0, sizeof(*desc));
+ /* No memset: the stage desc might still be used as a principal */
+ desc->attribute &= ~GUC_STAGE_DESC_ATTR_TYPE_MASK;
+ desc->db_id = 0;
+ desc->db_trigger_phy = 0;
+ desc->db_trigger_cpu = 0;
+ desc->db_trigger_uk = 0;
+ desc->process_desc = 0;
+ desc->wq_addr = 0;
+ desc->wq_size = 0;
}
/* Construct a Work Item and append it to the GuC's Work Queue */
@@ -1299,6 +1272,87 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
}
+static void guc_ppal_stage_attach(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_guc *guc = &ctx->i915->guc;
+ struct intel_context *ce = to_intel_context(ctx, engine);
+ struct guc_stage_desc *desc;
+
+ GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+ desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+ if (desc->lrc_count == 0) {
+ desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
+ GUC_STAGE_DESC_ATTR_PRINCIPAL |
+ GUC_STAGE_DESC_ATTR_KERNEL;
+ desc->stage_id = ce->sw_context_id;
+ }
+
+ GEM_BUG_ON(bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+ ce->sw_counter));
+ bitmap32_set_bit(desc->lrc_bitmap[engine->guc_class], ce->sw_counter);
+ desc->lrc_count++;
+
+ /* GuC optimizations */
+ if (ce->sw_counter >= desc->max_lrc_per_class)
+ desc->max_lrc_per_class = ce->sw_counter + 1;
+}
+
+static void guc_ppal_stage_detach(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_guc *guc = &ctx->i915->guc;
+ struct intel_context *ce = to_intel_context(ctx, engine);
+ struct guc_stage_desc *desc;
+ struct guc_execlist_context *lrc;
+
+ GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+ desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+ GEM_BUG_ON(!bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+ ce->sw_counter));
+ bitmap32_clear_bit(desc->lrc_bitmap[engine->guc_class], ce->sw_counter);
+ desc->lrc_count--;
+
+ if (desc->lrc_count == 0) {
+ desc->attribute = 0;
+ desc->stage_id = 0;
+ desc->max_lrc_per_class = 0;
+ } else {
+ /* TODO: GuC optimizations */
+ }
+
+ lrc = &desc->lrc[engine->guc_class][ce->sw_counter];
+ memset(lrc, 0, sizeof(*lrc));
+}
+
+static void guc_ppal_stage_update(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_guc *guc = &ctx->i915->guc;
+ struct intel_context *ce = to_intel_context(ctx, engine);
+ struct guc_stage_desc *desc;
+ struct guc_execlist_context *lrc;
+
+ GEM_BUG_ON(ce->sw_context_id >= GUC_MAX_STAGE_DESCRIPTORS);
+
+ desc = __get_ppal_stage_desc(guc, ce->sw_context_id);
+
+ GEM_BUG_ON(!bitmap32_test_bit(desc->lrc_bitmap[engine->guc_class],
+ ce->sw_counter));
+
+ lrc = &desc->lrc[engine->guc_class][ce->sw_counter];
+
+ lrc->context_desc = lower_32_bits(ce->lrc_desc);
+ lrc->context_id = upper_32_bits(ce->lrc_desc);
+ lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) + LRC_STATE_PN * PAGE_SIZE;
+ lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
+ lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
+}
+
int intel_guc_submission_enable(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
@@ -1321,17 +1375,26 @@ int intel_guc_submission_enable(struct intel_guc *guc)
GEM_BUG_ON(!guc->execbuf_client);
+ dev_priv->contexts.alloc_hook = guc_ppal_stage_attach;
+ dev_priv->contexts.update_hook = guc_ppal_stage_update;
+ dev_priv->contexts.free_hook = guc_ppal_stage_detach;
+
+ for_each_engine(engine, dev_priv, id) {
+ guc_ppal_stage_attach(dev_priv->kernel_context, engine);
+ guc_ppal_stage_update(dev_priv->kernel_context, engine);
+ }
+
guc_reset_wq(guc->execbuf_client);
if (guc->preempt_client)
guc_reset_wq(guc->preempt_client);
err = intel_guc_sample_forcewake(guc);
if (err)
- return err;
+ goto err_clear_ctx_hooks;
err = guc_clients_doorbell_init(guc);
if (err)
- return err;
+ goto err_clear_ctx_hooks;
/* Take over from manual control of ELSP (execlists) */
guc_interrupts_capture(dev_priv);
@@ -1342,6 +1405,12 @@ int intel_guc_submission_enable(struct intel_guc *guc)
}
return 0;
+
+err_clear_ctx_hooks:
+ dev_priv->contexts.alloc_hook = NULL;
+ dev_priv->contexts.update_hook = NULL;
+ dev_priv->contexts.free_hook = NULL;
+ return err;
}
void intel_guc_submission_disable(struct intel_guc *guc)
@@ -1352,6 +1421,10 @@ void intel_guc_submission_disable(struct intel_guc *guc)
guc_interrupts_release(dev_priv);
guc_clients_doorbell_fini(guc);
+
+ dev_priv->contexts.alloc_hook = NULL;
+ dev_priv->contexts.update_hook = NULL;
+ dev_priv->contexts.free_hook = NULL;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx