From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> It seems that the DMC likes to transition between the DC states a lot when there are no connected displays (no active power domains) during command submission. This activity on DC states has a negative impact on the performance of the chip with huge latencies observed in the interrupt handlers and elsewhere. Simple tests like igt/gem_latency -n 0 are slowed down by a factor of eight. Work around it by introducing a new power domain named, POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is held for the duration of command submission activity. v2: * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson) * Protect macro body with braces. (Jani Nikula) v3: * Add dedicated power domain for clarity. (Chris, Imre) * Commit message and comment text updates. * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC firmware release. v4: * Power domain should be inner to device runtime pm. (Chris) * Simplify NEEDS_CSR_GT_PERF_WA macro. (Chris) * Handle async DMC loading by moving the GT_IRQ power domain logic into intel_runtime_pm. (Daniel, Chris) * Include small core GEN9 as well. (Imre) Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572 Testcase: igt/gem_exec_nop/headless Cc: Imre Deak <imre.deak@xxxxxxxxx> Acked-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> (v2) Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 5 ++++ drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/i915_gem_request.c | 14 +++++++++++ drivers/gpu/drm/i915/intel_csr.c | 29 +++++++++++++--------- drivers/gpu/drm/i915/intel_runtime_pm.c | 44 +++++++++++++++++++++++++++------ 5 files changed, 76 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bddd65839f60..37b3da4fd0d4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -398,6 +398,7 @@ enum intel_display_power_domain { POWER_DOMAIN_AUX_D, POWER_DOMAIN_GMBUS, POWER_DOMAIN_MODESET, + POWER_DOMAIN_GT_IRQ, POWER_DOMAIN_INIT, POWER_DOMAIN_NUM, @@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv) #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 +#define NEEDS_CSR_GT_PERF_WA(dev_priv) \ + ((dev_priv)->csr.dmc_payload && \ + IS_GEN9(dev_priv) && !IS_SKYLAKE(dev_priv)) + #include "i915_trace.h" static inline bool intel_vtd_active(void) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 354b0546a191..a6f522e2d1a1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3381,6 +3381,8 @@ i915_gem_idle_work_handler(struct work_struct *work) if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); + + intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); intel_runtime_pm_put(dev_priv); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a90bdd26571f..c28a4ceb016d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -252,6 +252,20 @@ static void mark_busy(struct drm_i915_private *i915) GEM_BUG_ON(!i915->gt.active_requests); intel_runtime_pm_get_noresume(i915); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + i915->gt.awake = true; intel_enable_gt_powersave(i915); diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 07e4f7bc4412..8b188e9f283b 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -403,24 +403,33 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, static void csr_load_work_fn(struct work_struct *work) { - struct drm_i915_private *dev_priv; - struct intel_csr *csr; + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), csr.work); + struct intel_csr *csr = &dev_priv->csr; const struct firmware *fw = NULL; + u32 *dmc_payload; - dev_priv = container_of(work, typeof(*dev_priv), csr.work); - csr = &dev_priv->csr; + request_firmware(&fw, csr->fw_path, &dev_priv->drm.pdev->dev); - request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev); - if (fw) - dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw); + if (fw) { + dmc_payload = parse_csr_fw(dev_priv, fw); + release_firmware(fw); + } else { + dmc_payload = NULL; + } - if (dev_priv->csr.dmc_payload) { + /* Lock to document memory barrier towards NEEDS_CSR_GT_PERF_WA. */ + mutex_lock(&dev_priv->power_domains.lock); + csr->dmc_payload = dmc_payload; + mutex_unlock(&dev_priv->power_domains.lock); + + if (csr->dmc_payload) { intel_csr_load_program(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); DRM_INFO("Finished loading DMC firmware %s (v%u.%u)\n", - dev_priv->csr.fw_path, + csr->fw_path, CSR_VERSION_MAJOR(csr->version), CSR_VERSION_MINOR(csr->version)); } else { @@ -431,8 +440,6 @@ static void csr_load_work_fn(struct work_struct *work) dev_notice(dev_priv->drm.dev, "DMC firmware homepage: %s", INTEL_UC_FIRMWARE_URL); } - - release_firmware(fw); } /** diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 8315499452dc..915124a2e945 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -130,6 +130,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "INIT"; case POWER_DOMAIN_MODESET: return "MODESET"; + case POWER_DOMAIN_GT_IRQ: + return "GT_IRQ"; default: MISSING_CASE(domain); return "?"; @@ -1448,6 +1450,9 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; + if (domain == POWER_DOMAIN_GT_IRQ && !NEEDS_CSR_GT_PERF_WA(dev_priv)) + return; + for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_get(dev_priv, power_well); @@ -1518,6 +1523,19 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, return is_enabled; } +static void +__intel_display_power_put_domain(struct drm_i915_private *dev_priv, + enum intel_display_power_domain domain) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + + power_domains->domain_use_count[domain]--; + + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) + intel_power_well_put(dev_priv, power_well); +} + /** * intel_display_power_put - release a power domain reference * @dev_priv: i915 device instance @@ -1530,21 +1548,30 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains; - struct i915_power_well *power_well; - - power_domains = &dev_priv->power_domains; + struct i915_power_domains *power_domains = &dev_priv->power_domains; mutex_lock(&power_domains->lock); + if (domain == POWER_DOMAIN_GT_IRQ) { + /* + * To handle asynchronous DMC loading we have to be careful to + * keep the use count on POWER_DOMAIN_GT_IRQ balanced. + * + * If there was GT activity before the DMC was loaded, we will + * have skipped obtaining the power domain so must not decrement + * it now. + */ + if (!power_domains->domain_use_count[domain]) + goto out; + } + WARN(!power_domains->domain_use_count[domain], "Use count on domain %s is already zero\n", intel_display_power_domain_str(domain)); - power_domains->domain_use_count[domain]--; - for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) - intel_power_well_put(dev_priv, power_well); + __intel_display_power_put_domain(dev_priv, domain); +out: mutex_unlock(&power_domains->lock); intel_runtime_pm_put(dev_priv); @@ -1705,6 +1732,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -1727,6 +1755,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) @@ -1785,6 +1814,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \ GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx