On 26/04/2017 13:20, Joonas Lahtinen wrote:
Pre-calculate engine context size based on engine class and device generation and store it in the engine instance. v2: - Squash and get rid of hw_context_size (Chris) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Paulo Zanoni <paulo.r.zanoni@xxxxxxxxx> Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Oscar Mateo <oscar.mateo@xxxxxxxxx> Cc: Zhenyu Wang <zhenyuw@xxxxxxxxxxxxxxx> Cc: intel-gvt-dev@xxxxxxxxxxxxxxxxxxxxx --- drivers/gpu/drm/i915/gvt/scheduler.c | 6 +-- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem_context.c | 59 +++------------------- drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- drivers/gpu/drm/i915/i915_reg.h | 10 ---- drivers/gpu/drm/i915/intel_engine_cs.c | 78 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 54 +-------------------- drivers/gpu/drm/i915/intel_lrc.h | 2 - drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +-- 9 files changed, 93 insertions(+), 127 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a77db23..ac538dc 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; @@ -333,8 +332,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 357b6c6..4b54b92 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2359,7 +2359,6 @@ struct drm_i915_private { */ struct mutex av_mutex; - uint32_t hw_context_size; struct list_head context_list; u32 fdi_rx_config; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8bd0c49..b69a026 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,33 +92,6 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static int get_context_size(struct drm_i915_private *dev_priv) -{ - int ret; - u32 reg; - - switch (INTEL_GEN(dev_priv)) { - case 6: - reg = I915_READ(CXT_SIZE); - ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; - break; - case 7: - reg = I915_READ(GEN7_CXT_SIZE); - if (IS_HASWELL(dev_priv)) - ret = HSW_CXT_TOTAL_SIZE; - else - ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; - break; - case 8: - ret = GEN8_CXT_TOTAL_SIZE; - break; - default: - BUG(); - } - - return ret; -} - void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -266,11 +239,12 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - if (dev_priv->hw_context_size) { + if (dev_priv->engine[RCS]->context_size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size); + obj = alloc_context_obj(dev_priv, + dev_priv->engine[RCS]->context_size); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_out; @@ -443,21 +417,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->context_hw_ida); - if (i915.enable_execlists) { - /* NB: intentionally left blank. We will allocate our own - * backing objects as we need them, thank you very much */ - dev_priv->hw_context_size = 0; - } else if (HAS_HW_CONTEXTS(dev_priv)) { - dev_priv->hw_context_size = - round_up(get_context_size(dev_priv), - I915_GTT_PAGE_SIZE);
Is this rounding up lost when used from __create_hw_context?
- if (dev_priv->hw_context_size > (1<<20)) { - DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", - dev_priv->hw_context_size); - dev_priv->hw_context_size = 0; - } - } - ctx = i915_gem_create_context(dev_priv, NULL); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", @@ -478,7 +437,7 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("%s context support initialized\n", i915.enable_execlists ? "LR" : - dev_priv->hw_context_size ? "HW" : "fake"); + dev_priv->engine[RCS]->context_size ? "HW" : "fake"); return 0; } @@ -941,11 +900,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) return 0; } -static bool contexts_enabled(struct drm_device *dev) -{ - return i915.enable_execlists || to_i915(dev)->hw_context_size; -} - static bool client_is_banned(struct drm_i915_file_private *file_priv) { return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS; @@ -954,12 +908,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv) int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_context_create *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_gem_context *ctx; int ret; - if (!contexts_enabled(dev)) + if (!dev_priv->engine[RCS]->context_size) return -ENODEV; if (args->pad != 0) @@ -977,7 +932,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(to_i915(dev), file_priv); + ctx = i915_gem_create_context(dev_priv, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ab5140b..6c78637 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc) dev_priv->engine[RCS]->status_page.ggtt_offset; for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = - intel_lr_context_size(engine); + blob->ads.eng_state_size[engine->guc_id] = engine->context_size; base = guc_ggtt_offset(vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4c72ada..ee8170c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3370,16 +3370,6 @@ enum skl_disp_power_wells { #define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f) #define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ GEN7_CXT_VFSTATE_SIZE(ctx_reg)) -/* Haswell does have the CXT_SIZE register however it does not appear to be - * valid. Now, docs explain in dwords what is in the context object. The full - * size is 70720 bytes, however, the power context and execlist context will - * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size, including the extra state required for the - * Resource Streamer, is 66944 bytes, which rounds to 17 pages. - */ -#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) -/* Same as Haswell, but 72064 bytes now. */ -#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) enum { INTEL_ADVANCED_CONTEXT = 0, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 82a274b..24bcd9a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -26,6 +26,22 @@ #include "intel_ringbuffer.h" #include "intel_lrc.h" +/* Haswell does have the CXT_SIZE register however it does not appear to be + * valid. Now, docs explain in dwords what is in the context object. The full + * size is 70720 bytes, however, the power context and execlist context will + * never be saved (power context is stored elsewhere, and execlists don't work + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. + */ +#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) +/* Same as Haswell, but 72064 bytes now. */ +#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) + struct engine_class_info { const char *name; int (*init_legacy)(struct intel_engine_cs *engine); @@ -107,6 +123,64 @@ static const struct engine_info intel_engines[] = { }, }; +/** + * ___intel_engine_context_size() - return the size of the context for an engine + * @dev_priv: i915 device private + * @class: engine class + * + * Each engine class may require a different amount of space for a context + * image. + * + * Return: size (in bytes) of an engine class specific context image + * + * Note: this size includes the HWSP, which is part of the context image + * in LRC mode, but does not include the "shared data page" used with + * GuC submission. The caller should account for this if using the GuC. + */ +static u32 +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
Very minor, but Chris has been trying to establish i915 instead of dev_priv in new code.
+{ + u32 reg;
Also very minor, I think we normally use val for register values and reg for the register itself. Quick grep suggests we are not 100% consistent already so feel free to ignore. :)
+ + switch (class) { + case RENDER_CLASS: + switch (INTEL_GEN(dev_priv)) { + default: + MISSING_CASE(INTEL_GEN(dev_priv)); + case 9: + return GEN9_LR_CONTEXT_RENDER_SIZE; + case 8: + return i915.enable_execlists ? + GEN8_LR_CONTEXT_RENDER_SIZE : + GEN8_CXT_TOTAL_SIZE; + case 7: + if (IS_HASWELL(dev_priv)) + return HSW_CXT_TOTAL_SIZE; + + reg = I915_READ(GEN7_CXT_SIZE); + return GEN7_CXT_TOTAL_SIZE(reg) * 64; + case 6: + reg = I915_READ(CXT_SIZE); + return GEN6_CXT_TOTAL_SIZE(reg) * 64; + case 5: + case 4: + case 3: + case 2: + /* For the special day when i810 gets merged. */ + case 1: + return 0; + } + break; + default: + MISSING_CASE(class); + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + case COPY_ENGINE_CLASS: + WARN_ON(INTEL_GEN(dev_priv) < 8); + return GEN8_LR_CONTEXT_OTHER_SIZE; + } +} + static int intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) @@ -134,6 +208,10 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; + engine->context_size = __intel_engine_context_size(dev_priv, + engine->class); + if (WARN_ON(engine->context_size > BIT(20))) + engine->context_size = 0;
Don't know the history to tell whether upgrade of DRM_DEBUG_DRIVER to a WARN_ON is ok.
/* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5ec064a..0909549 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -138,10 +138,6 @@ #include "i915_drv.h" #include "intel_mocs.h" -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) - #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) #define RING_EXECLIST0_VALID (1 << 0x4) @@ -1918,53 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx, return 0; } -/** - * intel_lr_context_size() - return the size of the context for an engine - * @engine: which engine to find the context size for - * - * Each engine may require a different amount of space for a context image, - * so when allocating (or copying) an image, this function can be used to - * find the right size for the specific engine. - * - * Return: size (in bytes) of an engine-specific context image - * - * Note: this size includes the HWSP, which is part of the context image - * in LRC mode, but does not include the "shared data page" used with - * GuC submission. The caller should account for this if using the GuC. - */ -uint32_t intel_lr_context_size(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - WARN_ON(INTEL_GEN(dev_priv) < 8); - - switch (engine->class) { - case RENDER_CLASS: - switch (INTEL_GEN(dev_priv)) { - default: - MISSING_CASE(INTEL_GEN(dev_priv)); - case 9: - ret = GEN9_LR_CONTEXT_RENDER_SIZE; - break; - case 8: - ret = GEN8_LR_CONTEXT_RENDER_SIZE; - break; - } - break; - - default: - MISSING_CASE(engine->class); - case VIDEO_DECODE_CLASS: - case VIDEO_ENHANCEMENT_CLASS: - case COPY_ENGINE_CLASS: - ret = GEN8_LR_CONTEXT_OTHER_SIZE; - break; - } - - return ret; -} - static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1977,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, WARN_ON(ce->state); - context_size = round_up(intel_lr_context_size(engine), - I915_GTT_PAGE_SIZE); + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index e8015e7..52b3a1f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); struct drm_i915_private; struct i915_gem_context; -uint32_t intel_lr_context_size(struct intel_engine_cs *engine); - void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 96710b6..a64d18c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -196,13 +196,14 @@ struct intel_engine_cs { enum intel_engine_id id; unsigned int uabi_id; unsigned int hw_id; + unsigned int guc_id; u8 class; u8 instance; - - unsigned int guc_id; - u32 mmio_base; + u32 context_size; + u32 mmio_base; unsigned int irq_shift; + struct intel_ring *buffer; struct intel_timeline *timeline;
Looks good to me in principle, just unsure about those two items form above. Acked-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx