From: Praveen Diwakar <praveen.diwakar@xxxxxxxxx> This patch will select optimum eu/slice/sub-slice configuration based on type of load (low, medium, high) as input. Based on our readings and experiments we have predefined set of optimum configuration for each platform(CHT, KBL). i915_gem_context_set_load_type will select optimum configuration from pre-defined optimum configuration table(opt_config). It also introduce flag update_render_config which can set by any governor. v2: * Move static optimum_config to device init time. * Rename function to appropriate name, fix data types and patch ordering. * Rename prev_load_type to pending_load_type. (Tvrtko Ursulin) v3: * Add safe guard check in i915_gem_context_set_load_type. * Rename struct from optimum_config to i915_sseu_optimum_config to avoid namespace clashes. * Reduces memcpy for space efficient. * Rebase. * Improved commit message. (Tvrtko Ursulin) v4: * Move optimum config table to file scope. (Tvrtko Ursulin) Cc: Kedar J Karanje <kedar.j.karanje@xxxxxxxxx> Cc: Yogesh Marathe <yogesh.marathe@xxxxxxxxx> Signed-off-by: Praveen Diwakar <praveen.diwakar@xxxxxxxxx> Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@xxxxxxxxx> Signed-off-by: Ankit Navik <ankit.p.navik@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 5 ++++ drivers/gpu/drm/i915/i915_gem_context.c | 20 ++++++++++++++ drivers/gpu/drm/i915/i915_gem_context.h | 34 +++++++++++++++++++++++ drivers/gpu/drm/i915/intel_device_info.c | 47 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.c | 45 +++++++++++++++++++++++++++++- 5 files changed, 148 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5c8d048..97cb36b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1593,6 +1593,11 @@ struct drm_i915_private { struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */ int num_fence_regs; /* 8 on pre-965, 16 otherwise */ + /* optimal slice/subslice/EU configration state */ + struct i915_sseu_optimum_config *opt_config; + + int predictive_load_enable; + unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_preferred_vco_freq; unsigned int max_cdclk_freq; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a5876fe..8f16ef1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -454,10 +454,30 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, trace_i915_context_create(ctx); atomic_set(&ctx->req_cnt, 0); + ctx->slice_cnt = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask); + ctx->subslice_cnt = hweight8( + RUNTIME_INFO(dev_priv)->sseu.subslice_mask[0]); + ctx->eu_cnt = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice; return ctx; } + +void i915_gem_context_set_load_type(struct i915_gem_context *ctx, + enum gem_load_type type) +{ + struct drm_i915_private *dev_priv = ctx->i915; + + if (GEM_WARN_ON(type > LOAD_TYPE_LAST)) + return; + + /* Call opt_config to get correct configuration for eu,slice,subslice */ + ctx->slice_cnt = dev_priv->opt_config[type].slice; + ctx->subslice_cnt = dev_priv->opt_config[type].subslice; + ctx->eu_cnt = dev_priv->opt_config[type].eu; + ctx->pending_load_type = type; +} + /** * i915_gem_context_create_gvt - create a GVT GEM context * @dev: drm device * diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index c940168..0a24d28 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -54,6 +54,19 @@ struct intel_context_ops { void (*destroy)(struct intel_context *ce); }; +enum gem_load_type { + LOAD_TYPE_LOW, + LOAD_TYPE_MEDIUM, + LOAD_TYPE_HIGH, + LOAD_TYPE_LAST +}; + +struct i915_sseu_optimum_config { + u8 slice; + u8 subslice; + u8 eu; +}; + /* * Powergating configuration for a particular (context,engine). */ @@ -232,6 +245,25 @@ struct i915_gem_context { * go for low/medium/high load configuration of the GPU. */ atomic_t req_cnt; + + /** slice_cnt: used to set the # of slices to be enabled. */ + u8 slice_cnt; + + /** subslice_cnt: used to set the # of subslices to be enabled. */ + u8 subslice_cnt; + + /** eu_cnt: used to set the # of eu to be enabled. */ + u8 eu_cnt; + + /** load_type: The designated load_type (high/medium/low) for a given + * number of pending commands in the command queue. + */ + enum gem_load_type load_type; + + /** pending_load_type: The earlier load type that the GPU was configured + * for (high/medium/low). + */ + enum gem_load_type pending_load_type; }; static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx) @@ -375,6 +407,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void i915_gem_context_set_load_type(struct i915_gem_context *ctx, + enum gem_load_type type); struct i915_gem_context * i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 855a507..017a1e2 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -707,6 +707,27 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) return 0; } +/* static table of slice/subslice/EU for Cherryview */ +static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = { + {1, 1, 4}, /* Low */ + {1, 1, 6}, /* Medium */ + {1, 2, 6} /* High */ +}; + +/* static table of slice/subslice/EU for KBL GT2 */ +static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = { + {1, 3, 2}, /* Low */ + {1, 3, 4}, /* Medium */ + {1, 3, 8} /* High */ +}; + +/* static table of slice/subslice/EU for KBL GT3 */ +static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = { + {2, 3, 4}, /* Low */ + {2, 3, 6}, /* Medium */ + {2, 3, 8} /* High */ +}; + /** * intel_device_info_runtime_init - initialize runtime info * @dev_priv: the i915 device @@ -728,6 +749,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) struct intel_device_info *info = mkwrite_device_info(dev_priv); struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv); enum pipe pipe; + struct i915_sseu_optimum_config *opt_config = NULL; if (INTEL_GEN(dev_priv) >= 10) { for_each_pipe(dev_priv, pipe) @@ -831,12 +853,30 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) /* Initialize slice/subslice/EU info */ if (IS_HASWELL(dev_priv)) haswell_sseu_info_init(dev_priv); - else if (IS_CHERRYVIEW(dev_priv)) + else if (IS_CHERRYVIEW(dev_priv)) { cherryview_sseu_info_init(dev_priv); + opt_config = chv_config; + BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST); + } else if (IS_BROADWELL(dev_priv)) broadwell_sseu_info_init(dev_priv); - else if (IS_GEN(dev_priv, 9)) + else if (IS_GEN(dev_priv, 9)) { gen9_sseu_info_init(dev_priv); + + switch (info->gt) { + default: /* fall through */ + case 2: + opt_config = kbl_gt2_config; + BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config) + != LOAD_TYPE_LAST); + break; + case 3: + opt_config = kbl_gt3_config; + BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config) + != LOAD_TYPE_LAST); + break; + } + } else if (IS_GEN(dev_priv, 10)) gen10_sseu_info_init(dev_priv); else if (INTEL_GEN(dev_priv) >= 11) @@ -847,6 +887,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->ppgtt = INTEL_PPGTT_NONE; } + if (opt_config) + dev_priv->opt_config = opt_config; + /* Initialize command stream timestamp frequency */ runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d0af37d..397af1e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1282,6 +1282,35 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) return i915_vma_pin(vma, 0, 0, flags); } +static u32 +get_context_rpcs_config(struct i915_gem_context *ctx) +{ + u32 rpcs = 0; + struct drm_i915_private *dev_priv = ctx->i915; + + if (INTEL_GEN(dev_priv) < 8) + return 0; + + if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) { + rpcs |= GEN8_RPCS_S_CNT_ENABLE; + rpcs |= ctx->slice_cnt << GEN8_RPCS_S_CNT_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } + + if (RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg) { + rpcs |= GEN8_RPCS_SS_CNT_ENABLE; + rpcs |= ctx->subslice_cnt << GEN8_RPCS_SS_CNT_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } + + if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) { + rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MIN_SHIFT; + rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MAX_SHIFT; + rpcs |= GEN8_RPCS_ENABLE; + } + + return rpcs; +} static void __execlists_update_reg_state(struct intel_engine_cs *engine, struct intel_context *ce) @@ -1294,9 +1323,20 @@ __execlists_update_reg_state(struct intel_engine_cs *engine, regs[CTX_RING_TAIL + 1] = ring->tail; /* RPCS */ - if (engine->class == RENDER_CLASS) + if (engine->class == RENDER_CLASS && + engine->i915->predictive_load_enable) { + u32 rpcs_config = 0; + struct i915_gem_context *ctx = ce->gem_context; + + rpcs_config = get_context_rpcs_config(ctx); + regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + rpcs_config); + + } else if (engine->class == RENDER_CLASS) { regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915, &ce->sseu); + } } static struct intel_context * @@ -1340,6 +1380,9 @@ __execlists_context_pin(struct intel_engine_cs *engine, __execlists_update_reg_state(engine, ce); + if (ctx->load_type != ctx->pending_load_type) + ctx->load_type = ctx->pending_load_type; + ce->state->obj->pin_global++; i915_gem_context_get(ctx); return ce; -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx