[PATCH v4] drm/i915: Restore GT performance in headless mode with DMC loaded

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

It seems that the DMC likes to transition between the DC states a lot when
there are no connected displays (no active power domains) during command
submission.

This activity on DC states has a negative impact on the performance of the
chip with huge latencies observed in the interrupt handlers and elsewhere.
Simple tests like igt/gem_latency -n 0 are slowed down by a factor of
eight.

Work around it by introducing a new power domain named,
POWER_DOMAIN_GT_IRQ, associtated with the "DC off" power well, which is
held for the duration of command submission activity.

v2:
 * Add commit text as comment in i915_gem_mark_busy. (Chris Wilson)
 * Protect macro body with braces. (Jani Nikula)

v3:
 * Add dedicated power domain for clarity. (Chris, Imre)
 * Commit message and comment text updates.
 * Apply to all big-core GEN9 parts apart for Skylake which is pending DMC
   firmware release.

v4:
 * Power domain should be inner to device runtime pm. (Chris)
 * Simplify NEEDS_CSR_GT_PERF_WA macro. (Chris)
 * Handle async DMC loading by moving the GT_IRQ power domain logic into
   intel_runtime_pm. (Daniel, Chris)
 * Include small core GEN9 as well. (Imre)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572
Testcase: igt/gem_exec_nop/headless
Cc: Imre Deak <imre.deak@xxxxxxxxx>
Acked-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> (v2)
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h         |  5 ++++
 drivers/gpu/drm/i915/i915_gem.c         |  2 ++
 drivers/gpu/drm/i915/i915_gem_request.c | 14 +++++++++++
 drivers/gpu/drm/i915/intel_csr.c        | 29 +++++++++++++---------
 drivers/gpu/drm/i915/intel_runtime_pm.c | 44 +++++++++++++++++++++++++++------
 5 files changed, 76 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bddd65839f60..37b3da4fd0d4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -398,6 +398,7 @@ enum intel_display_power_domain {
 	POWER_DOMAIN_AUX_D,
 	POWER_DOMAIN_GMBUS,
 	POWER_DOMAIN_MODESET,
+	POWER_DOMAIN_GT_IRQ,
 	POWER_DOMAIN_INIT,
 
 	POWER_DOMAIN_NUM,
@@ -3285,6 +3286,10 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+#define NEEDS_CSR_GT_PERF_WA(dev_priv) \
+	((dev_priv)->csr.dmc_payload && \
+	 IS_GEN9(dev_priv) && !IS_SKYLAKE(dev_priv))
+
 #include "i915_trace.h"
 
 static inline bool intel_vtd_active(void)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 354b0546a191..a6f522e2d1a1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3381,6 +3381,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
 	if (INTEL_GEN(dev_priv) >= 6)
 		gen6_rps_idle(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
 	intel_runtime_pm_put(dev_priv);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a90bdd26571f..c28a4ceb016d 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -252,6 +252,20 @@ static void mark_busy(struct drm_i915_private *i915)
 	GEM_BUG_ON(!i915->gt.active_requests);
 
 	intel_runtime_pm_get_noresume(i915);
+
+	/*
+	 * It seems that the DMC likes to transition between the DC states a lot
+	 * when there are no connected displays (no active power domains) during
+	 * command submission.
+	 *
+	 * This activity has negative impact on the performance of the chip with
+	 * huge latencies observed in the interrupt handler and elsewhere.
+	 *
+	 * Work around it by grabbing a GT IRQ power domain whilst there is any
+	 * GT activity, preventing any DC state transitions.
+	 */
+	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+
 	i915->gt.awake = true;
 
 	intel_enable_gt_powersave(i915);
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 07e4f7bc4412..8b188e9f283b 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -403,24 +403,33 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
 
 static void csr_load_work_fn(struct work_struct *work)
 {
-	struct drm_i915_private *dev_priv;
-	struct intel_csr *csr;
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), csr.work);
+	struct intel_csr *csr = &dev_priv->csr;
 	const struct firmware *fw = NULL;
+	u32 *dmc_payload;
 
-	dev_priv = container_of(work, typeof(*dev_priv), csr.work);
-	csr = &dev_priv->csr;
+	request_firmware(&fw, csr->fw_path, &dev_priv->drm.pdev->dev);
 
-	request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev);
-	if (fw)
-		dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw);
+	if (fw) {
+		dmc_payload = parse_csr_fw(dev_priv, fw);
+		release_firmware(fw);
+	} else {
+		dmc_payload = NULL;
+	}
 
-	if (dev_priv->csr.dmc_payload) {
+	/* Lock to document memory barrier towards NEEDS_CSR_GT_PERF_WA. */
+	mutex_lock(&dev_priv->power_domains.lock);
+	csr->dmc_payload = dmc_payload;
+	mutex_unlock(&dev_priv->power_domains.lock);
+
+	if (csr->dmc_payload) {
 		intel_csr_load_program(dev_priv);
 
 		intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
 
 		DRM_INFO("Finished loading DMC firmware %s (v%u.%u)\n",
-			 dev_priv->csr.fw_path,
+			 csr->fw_path,
 			 CSR_VERSION_MAJOR(csr->version),
 			 CSR_VERSION_MINOR(csr->version));
 	} else {
@@ -431,8 +440,6 @@ static void csr_load_work_fn(struct work_struct *work)
 		dev_notice(dev_priv->drm.dev, "DMC firmware homepage: %s",
 			   INTEL_UC_FIRMWARE_URL);
 	}
-
-	release_firmware(fw);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 8315499452dc..915124a2e945 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -130,6 +130,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain)
 		return "INIT";
 	case POWER_DOMAIN_MODESET:
 		return "MODESET";
+	case POWER_DOMAIN_GT_IRQ:
+		return "GT_IRQ";
 	default:
 		MISSING_CASE(domain);
 		return "?";
@@ -1448,6 +1450,9 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv,
 	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 	struct i915_power_well *power_well;
 
+	if (domain == POWER_DOMAIN_GT_IRQ && !NEEDS_CSR_GT_PERF_WA(dev_priv))
+		return;
+
 	for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain))
 		intel_power_well_get(dev_priv, power_well);
 
@@ -1518,6 +1523,19 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
 	return is_enabled;
 }
 
+static void
+__intel_display_power_put_domain(struct drm_i915_private *dev_priv,
+				 enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *power_well;
+
+	power_domains->domain_use_count[domain]--;
+
+	for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain))
+		intel_power_well_put(dev_priv, power_well);
+}
+
 /**
  * intel_display_power_put - release a power domain reference
  * @dev_priv: i915 device instance
@@ -1530,21 +1548,30 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
 void intel_display_power_put(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain)
 {
-	struct i915_power_domains *power_domains;
-	struct i915_power_well *power_well;
-
-	power_domains = &dev_priv->power_domains;
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
 
 	mutex_lock(&power_domains->lock);
 
+	if (domain == POWER_DOMAIN_GT_IRQ) {
+		/*
+		 * To handle asynchronous DMC loading we have to be careful to
+		 * keep the use count on POWER_DOMAIN_GT_IRQ balanced.
+		 *
+		 * If there was GT activity before the DMC was loaded, we will
+		 * have skipped obtaining the power domain so must not decrement
+		 * it now.
+		 */
+		if (!power_domains->domain_use_count[domain])
+			goto out;
+	}
+
 	WARN(!power_domains->domain_use_count[domain],
 	     "Use count on domain %s is already zero\n",
 	     intel_display_power_domain_str(domain));
-	power_domains->domain_use_count[domain]--;
 
-	for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain))
-		intel_power_well_put(dev_priv, power_well);
+	__intel_display_power_put_domain(dev_priv, domain);
 
+out:
 	mutex_unlock(&power_domains->lock);
 
 	intel_runtime_pm_put(dev_priv);
@@ -1705,6 +1732,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
 	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
 	BIT_ULL(POWER_DOMAIN_MODESET) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
@@ -1727,6 +1755,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_INIT))
 #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS (		\
 	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
 	BIT_ULL(POWER_DOMAIN_MODESET) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
@@ -1785,6 +1814,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_INIT))
 #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS (		\
 	GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
 	BIT_ULL(POWER_DOMAIN_MODESET) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
-- 
2.14.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux