This patch will select optimum eu/slice/sub-slice configuration based on type of load (low, medium, high) as input. Based on our readings and experiments we have predefined set of optimum configuration for each platform(CHT, KBL). i915_gem_context_set_load_type will select optimum configuration from pre-defined optimum configuration table(opt_config). It also introduce flag update_render_config which can set by any governor. v2: * Move static optimum_config to device init time. * Rename function to appropriate name, fix data types and patch ordering. * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin) v3: * Add safe guard check in i915_gem_context_set_load_type. * Rename struct from optimum_config to i915_sseu_optimum_config to avoid namespace clashes. * Reduces memcpy for space efficient. * Rebase. * Improved commit message. (Tvrtko Ursulin) v4: * Move optimum config table to file scope. (Tvrtko Ursulin) v5: * Adds optimal table of slice/sub-slice/EU for Gen 9 GT1. * Rebase. v6: * Rebase. * Fix warnings. v7: * Fix return conditions. * Remove i915_gem_context_set_load_type and move logic to __execlists_update_reg_state. (Tvrtko Ursulin) Cc: Vipin Anand <vipin.anand@xxxxxxxxx> Signed-off-by: Ankit Navik <ankit.p.navik@xxxxxxxxx> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 3 + drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 32 +++++++++++ drivers/gpu/drm/i915/gt/intel_context_sseu.c | 2 + drivers/gpu/drm/i915/gt/intel_context_types.h | 2 + drivers/gpu/drm/i915/gt/intel_lrc.c | 70 ++++++++++++++++++++++- drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/intel_device_info.c | 55 +++++++++++++++++- 7 files changed, 165 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index d0ff999429ff..3aad45b0ba5a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -880,6 +880,9 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) trace_i915_context_create(ctx); atomic_set(&ctx->req_cnt, 0); + ctx->slice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.slice_mask); + ctx->subslice_cnt = hweight8(RUNTIME_INFO(i915)->sseu.subslice_mask[0]); + ctx->eu_cnt = RUNTIME_INFO(i915)->sseu.eu_per_subslice; return ctx; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index a9ba13f8865e..1af1acd73794 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -46,6 +46,19 @@ struct i915_gem_engines_iter { const struct i915_gem_engines *engines; }; +enum gem_load_type { + LOAD_TYPE_LOW, + LOAD_TYPE_MEDIUM, + LOAD_TYPE_HIGH, + LOAD_TYPE_LAST +}; + +struct i915_sseu_optimum_config { + u8 slice; + u8 subslice; + u8 eu; +}; + /** * struct i915_gem_context - client state * @@ -155,6 +168,25 @@ struct i915_gem_context { */ atomic_t active_count; + /** slice_cnt: used to set the # of slices to be enabled. */ + u8 slice_cnt; + + /** subslice_cnt: used to set the # of subslices to be enabled. */ + u8 subslice_cnt; + + /** eu_cnt: used to set the # of eu to be enabled. */ + u8 eu_cnt; + + /** load_type: The designated load_type (high/medium/low) for a given + * number of pending commands in the command queue. + */ + enum gem_load_type load_type; + + /** pending_load_type: The earlier load type that the GPU was configured + * for (high/medium/low). + */ + enum gem_load_type pending_load_type; + /** * @hang_timestamp: The last time(s) this context caused a GPU hang */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c index 57a30956c922..4f51bfb9690c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -84,6 +84,8 @@ intel_context_reconfigure_sseu(struct intel_context *ce, if (ret) return ret; + ce->user_sseu = true; + /* Nothing to do if unmodified. */ if (!memcmp(&ce->sseu, &sseu, sizeof(sseu))) goto unlock; diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 0f3b68b95c56..fd5811110026 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -93,6 +93,8 @@ struct intel_context { const struct intel_context_ops *ops; + bool user_sseu; + /** sseu: Control eu/slice partitioning */ struct intel_sseu sseu; }; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index ccfebebb0071..7c5f05886278 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -177,6 +177,14 @@ /* Typical size of the average request (2 pipecontrols and a MI_BB) */ #define EXECLISTS_REQUEST_SIZE 64 /* bytes */ +/* + * Anything above threshold is considered as HIGH load, less is considered + * as LOW load and equal is considered as MEDIUM load. + * + * The threshold value of three active requests pending. + */ +#define PENDING_THRESHOLD_MEDIUM 3 + struct virtual_engine { struct intel_engine_cs base; struct intel_context context; @@ -3002,6 +3010,36 @@ static void execlists_context_unpin(struct intel_context *ce) i915_gem_object_unpin_map(ce->state->obj); } +static u32 +get_context_rpcs_config(struct i915_gem_context *ctx) +{ + u32 rpcs = 0; + struct drm_i915_private *dev_priv = ctx->i915; + + if (INTEL_GEN(dev_priv) < 8) + return 0; + + if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) { + rpcs |= GEN8_RPCS_S_CNT_ENABLE; + rpcs |= ctx->slice_cnt << GEN8_RPCS_S_CNT_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } + + if (RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg) { + rpcs |= GEN8_RPCS_SS_CNT_ENABLE; + rpcs |= ctx->subslice_cnt << GEN8_RPCS_SS_CNT_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } + + if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) { + rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MIN_SHIFT; + rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MAX_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } + + return rpcs; +} + static void __execlists_update_reg_state(const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -3009,6 +3047,10 @@ __execlists_update_reg_state(const struct intel_context *ce, { struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; + const struct i915_sseu_optimum_config *cfg; + struct i915_gem_context *ctx; + enum gem_load_type load_type; + u32 req_pending; GEM_BUG_ON(!intel_ring_offset_valid(ring, head)); GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail)); @@ -3018,10 +3060,31 @@ __execlists_update_reg_state(const struct intel_context *ce, regs[CTX_RING_TAIL] = ring->tail; regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; + GEM_BUG_ON(ce->engine->class != RENDER_CLASS); + ctx = rcu_dereference_protected(ce->gem_context, true); + + req_pending = atomic_read(&ctx->req_cnt); + + if (req_pending > PENDING_THRESHOLD_MEDIUM) + load_type = LOAD_TYPE_HIGH; + else if (req_pending == PENDING_THRESHOLD_MEDIUM) + load_type = LOAD_TYPE_MEDIUM; + else + load_type = LOAD_TYPE_LOW; + + cfg = &ctx->i915->opt_config[load_type]; + /* RPCS */ if (engine->class == RENDER_CLASS) { - regs[CTX_R_PWR_CLK_STATE] = - intel_sseu_make_rpcs(engine->i915, &ce->sseu); + + if (!ctx || !ctx->i915->predictive_load_enable + || ce->user_sseu) { + regs[CTX_R_PWR_CLK_STATE] = + intel_sseu_make_rpcs(engine->i915, &ce->sseu); + } else { + regs[CTX_R_PWR_CLK_STATE] = + get_context_rpcs_config(ce->gem_context); + } i915_oa_init_reg_state(ce, engine); } @@ -3046,6 +3109,9 @@ __execlists_context_pin(struct intel_context *ce, ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; __execlists_update_reg_state(ce, engine, ce->ring->tail); + if (ce->gem_context->load_type != ce->gem_context->pending_load_type) + ce->gem_context->load_type = ce->gem_context->pending_load_type; + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1f5b9a584f71..304d95aa4974 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -926,6 +926,11 @@ struct drm_i915_private { /* protects panel power sequencer state */ struct mutex pps_mutex; + /* optimal slice/subslice/EU configration state */ + struct i915_sseu_optimum_config *opt_config; + + bool predictive_load_enable; + unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_preferred_vco_freq; unsigned int max_cdclk_freq; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d7fe12734db8..53d966a9097e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -899,6 +899,34 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) RUNTIME_INFO(i915)->platform_mask[pi] |= mask; } +/* static table of slice/subslice/EU for Cherryview */ +static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = { + {1, 1, 4}, /* Low */ + {1, 1, 6}, /* Medium */ + {1, 2, 6} /* High */ +}; + +/* static table of slice/subslice/EU for GLK GT1 */ +static const struct i915_sseu_optimum_config glk_gt1_config[LOAD_TYPE_LAST] = { + {1, 2, 2}, /* Low */ + {1, 2, 3}, /* Medium */ + {1, 2, 6} /* High */ +}; + +/* static table of slice/subslice/EU for KBL GT2 */ +static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = { + {1, 3, 2}, /* Low */ + {1, 3, 4}, /* Medium */ + {1, 3, 8} /* High */ +}; + +/* static table of slice/subslice/EU for KBL GT3 */ +static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = { + {2, 3, 4}, /* Low */ + {2, 3, 6}, /* Medium */ + {2, 3, 8} /* High */ +}; + /** * intel_device_info_runtime_init - initialize runtime info * @dev_priv: the i915 device @@ -1027,12 +1055,35 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) /* Initialize slice/subslice/EU info */ if (IS_HASWELL(dev_priv)) hsw_sseu_info_init(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) + else if (IS_CHERRYVIEW(dev_priv)) { cherryview_sseu_info_init(dev_priv); + BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST); + dev_priv->opt_config = chv_config; + } else if (IS_BROADWELL(dev_priv)) bdw_sseu_info_init(dev_priv); - else if (IS_GEN(dev_priv, 9)) + else if (IS_GEN(dev_priv, 9)) { gen9_sseu_info_init(dev_priv); + + switch (info->gt) { + default: /* fall through */ + case 1: + BUILD_BUG_ON(ARRAY_SIZE(glk_gt1_config) != + LOAD_TYPE_LAST); + dev_priv->opt_config = glk_gt1_config; + break; + case 2: + BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config) != + LOAD_TYPE_LAST); + dev_priv->opt_config = kbl_gt2_config; + break; + case 3: + BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config) != + LOAD_TYPE_LAST); + dev_priv->opt_config = kbl_gt3_config; + break; + } + } else if (IS_GEN(dev_priv, 10)) gen10_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 11)) -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx