Re: [RFC] drm/i915: Simplify waiting for registers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 12/04/2017 11:36, Tvrtko Ursulin wrote:
From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

I was thinking if we could get away with simplifying the API
a bit by getting rid of the _fw variant and only have these
three functions with a common implementation:

  intel_wait_for_register
  intel_wait_for_register_atomic
  __intel_wait_for_register

The fast/busy loop in all cases grabs it's own forcewake and
is done under the uncore lock. The extra overhead for call
sites which already have the forcewake, or do not need it is
there, but not sure that it matters for where wait_for_register
functions are used.

This is probably quite bad for pcode, since AFAIR those can be quite slow. So scratch this idea I think..

Regards,

Tvrtko

This makes the difference between the normal and atomic API
just in the fact atomic doesn't sleep while the normal can.

I've also put in the change to move the BUILD_BUG_ON from
_wait_for_atomic to wait_for_atomic(_us) macros, as Michal
suggested at one point, which should fix the GCC 4.4 build
issue.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Michal Wajdeczko <michal.wajdeczko@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h         |  75 ++++++++++++++-----
 drivers/gpu/drm/i915/intel_drv.h        |  18 ++++-
 drivers/gpu/drm/i915/intel_i2c.c        |   4 +-
 drivers/gpu/drm/i915/intel_pm.c         |  10 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c |   9 ++-
 drivers/gpu/drm/i915/intel_uc.c         |  10 +--
 drivers/gpu/drm/i915/intel_uncore.c     | 123 +++++++++-----------------------
 7 files changed, 121 insertions(+), 128 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ed079c244b5d..ba95a54b9dfa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3083,27 +3083,68 @@ u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);

 void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);

-int intel_wait_for_register(struct drm_i915_private *dev_priv,
-			    i915_reg_t reg,
-			    u32 mask,
-			    u32 value,
-			    unsigned int timeout_ms);
-int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
-				 i915_reg_t reg,
-				 u32 mask,
-				 u32 value,
-				 unsigned int fast_timeout_us,
-				 unsigned int slow_timeout_ms,
-				 u32 *out_value);
-static inline
-int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
+int __intel_wait_for_register(struct drm_i915_private *dev_priv,
+			      i915_reg_t reg,
+			      u32 mask,
+			      u32 value,
+			      unsigned int fast_timeout_us,
+			      unsigned int slow_timeout_ms,
+			      u32 *out_value);
+
+/**
+ * intel_wait_for_register - wait until register matches expected state
+ * @dev_priv: the i915 device
+ * @reg: the register to read
+ * @mask: mask to apply to register value
+ * @value: expected value
+ * @timeout_ms: timeout in millisecond
+ *
+ * This routine waits until the target register @reg contains the expected
+ * @value after applying the @mask, i.e. it waits until ::
+ *
+ *     (I915_READ(reg) & mask) == value
+ *
+ * Otherwise, the wait will timeout after @timeout_ms milliseconds.
+ *
+ * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
+ */
+static inline int
+intel_wait_for_register(struct drm_i915_private *dev_priv,
+			i915_reg_t reg,
+			u32 mask,
+			u32 value,
+			unsigned int timeout_ms)
+{
+	return __intel_wait_for_register(dev_priv, reg, mask, value,
+					 2, timeout_ms, NULL);
+}
+
+/**
+ * intel_wait_for_register_atomic - wait until register matches expected state
+ * @dev_priv: the i915 device
+ * @reg: the register to read
+ * @mask: mask to apply to register value
+ * @value: expected value
+ * @timeout_us: timeout in microsecond
+ *
+ * This routine waits until the target register @reg contains the expected
+ * @value after applying the @mask, i.e. it waits until ::
+ *
+ *     (I915_READ(reg) & mask) == value
+ *
+ * The wait is done with busy-looping.
+ *
+ * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
+ */
+static inline int
+intel_wait_for_register_atomic(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg,
 			       u32 mask,
 			       u32 value,
-			       unsigned int timeout_ms)
+			       unsigned int timeout_us)
 {
-	return __intel_wait_for_register_fw(dev_priv, reg, mask, value,
-					    2, timeout_ms, NULL);
+	return __intel_wait_for_register(dev_priv, reg, mask, value,
+					 timeout_us, 0, NULL);
 }

 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 43ea74882f9a..d7018d44371c 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -88,7 +88,6 @@
 	int cpu, ret, timeout = (US) * 1000; \
 	u64 base; \
 	_WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
-	BUILD_BUG_ON((US) > 50000); \
 	if (!(ATOMIC)) { \
 		preempt_disable(); \
 		cpu = smp_processor_id(); \
@@ -130,8 +129,21 @@
 	ret__; \
 })

-#define wait_for_atomic(COND, MS)	_wait_for_atomic((COND), (MS) * 1000, 1)
-#define wait_for_atomic_us(COND, US)	_wait_for_atomic((COND), (US), 1)
+#define wait_for_atomic(COND, MS) \
+({ \
+	int ret__; \
+	BUILD_BUG_ON((MS) > 50); \
+	ret__ = _wait_for_atomic((COND), (MS) * 1000, 1); \
+	ret__; \
+})
+
+#define wait_for_atomic_us(COND, US) \
+({ \
+	int ret__; \
+	BUILD_BUG_ON((US) > 50000); \
+	ret__ = _wait_for_atomic((COND), (US), 1); \
+	ret__; \
+})

 #define KHz(x) (1000 * (x))
 #define MHz(x) KHz(1000 * (x))
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index b6401e8f1bd6..ddc615d3d40d 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -297,9 +297,7 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv)
 	add_wait_queue(&dev_priv->gmbus_wait_queue, &wait);
 	I915_WRITE_FW(GMBUS4, irq_enable);

-	ret = intel_wait_for_register_fw(dev_priv,
-					 GMBUS2, GMBUS_ACTIVE, 0,
-					 10);
+	ret = intel_wait_for_register(dev_priv, GMBUS2, GMBUS_ACTIVE, 0, 10);

 	I915_WRITE_FW(GMBUS4, 0);
 	remove_wait_queue(&dev_priv->gmbus_wait_queue, &wait);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index cacb65fa2dd5..a93e6427aabf 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -8135,9 +8135,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
 	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);

-	if (__intel_wait_for_register_fw(dev_priv,
-					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 500, 0, NULL)) {
+	if (intel_wait_for_register_atomic(dev_priv, GEN6_PCODE_MAILBOX,
+					   GEN6_PCODE_READY, 0, 500)) {
 		DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
 		return -ETIMEDOUT;
 	}
@@ -8180,9 +8179,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
 	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
 	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);

-	if (__intel_wait_for_register_fw(dev_priv,
-					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 500, 0, NULL)) {
+	if (intel_wait_for_register_atomic(dev_priv, GEN6_PCODE_MAILBOX,
+					   GEN6_PCODE_READY, 0, 500)) {
 		DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 97d5fcca8805..af31d786a517 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1729,11 +1729,10 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
 	I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);

 	/* Wait for the ring not to be idle, i.e. for it to wake up. */
-	if (__intel_wait_for_register_fw(dev_priv,
-					 GEN6_BSD_SLEEP_PSMI_CONTROL,
-					 GEN6_BSD_SLEEP_INDICATOR,
-					 0,
-					 1000, 0, NULL))
+	if (intel_wait_for_register_atomic(dev_priv,
+					   GEN6_BSD_SLEEP_PSMI_CONTROL,
+					   GEN6_BSD_SLEEP_INDICATOR, 0,
+					   1000))
 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");

 	/* Now that the ring is fully powered up, update the tail */
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 4364b1a9064e..bae60f5c52d6 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -389,11 +389,11 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
 	 * No GuC command should ever take longer than 10ms.
 	 * Fast commands should still complete in 10us.
 	 */
-	ret = __intel_wait_for_register_fw(dev_priv,
-					   SOFT_SCRATCH(0),
-					   INTEL_GUC_RECV_MASK,
-					   INTEL_GUC_RECV_MASK,
-					   10, 10, &status);
+	ret = __intel_wait_for_register(dev_priv,
+					SOFT_SCRATCH(0),
+					INTEL_GUC_RECV_MASK,
+					INTEL_GUC_RECV_MASK,
+					10, 10, &status);
 	if (status != INTEL_GUC_STATUS_SUCCESS) {
 		/*
 		 * Either the GuC explicitly returned an error (which
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index fb38c7692fc2..215fbed8808c 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1535,9 +1535,8 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
 	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);

 	/* Spin waiting for the device to ack the reset requests */
-	return intel_wait_for_register_fw(dev_priv,
-					  GEN6_GDRST, hw_domain_mask, 0,
-					  500);
+	return intel_wait_for_register(dev_priv, GEN6_GDRST, hw_domain_mask, 0,
+				       500);
 }

 /**
@@ -1585,7 +1584,7 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
 }

 /**
- * __intel_wait_for_register_fw - wait until register matches expected state
+ * __intel_wait_for_register - wait until register matches expected state
  * @dev_priv: the i915 device
  * @reg: the register to read
  * @mask: mask to apply to register value
@@ -1597,90 +1596,47 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
  * This routine waits until the target register @reg contains the expected
  * @value after applying the @mask, i.e. it waits until ::
  *
- *     (I915_READ_FW(reg) & mask) == value
+ *     (I915_READ(reg) & mask) == value
  *
  * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
- * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
- * must be not larger than 20,0000 microseconds.
- *
- * Note that this routine assumes the caller holds forcewake asserted, it is
- * not suitable for very long waits. See intel_wait_for_register() if you
- * wish to wait without holding forcewake for the duration (i.e. you expect
- * the wait to be slow).
  *
  * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
  */
-int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
-				 i915_reg_t reg,
-				 u32 mask,
-				 u32 value,
-				 unsigned int fast_timeout_us,
-				 unsigned int slow_timeout_ms,
-				 u32 *out_value)
-{
+int __intel_wait_for_register(struct drm_i915_private *dev_priv,
+			      i915_reg_t reg,
+			      u32 mask,
+			      u32 value,
+			      unsigned int fast_timeout_us,
+			      unsigned int slow_timeout_ms,
+			      u32 *out_value)
+{
+	unsigned long flags;
+	unsigned int fw_domains;
 	u32 reg_value;
-#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
 	int ret;

 	/* Catch any overuse of this function */
 	might_sleep_if(slow_timeout_ms);
-	GEM_BUG_ON(fast_timeout_us > 20000);
-
-	ret = -ETIMEDOUT;
-	if (fast_timeout_us && fast_timeout_us <= 20000)
-		ret = _wait_for_atomic(done, fast_timeout_us, 0);
-	if (ret)
-		ret = wait_for(done, slow_timeout_ms);
+	GEM_BUG_ON(fast_timeout_us > 1000);

-	if (out_value)
-		*out_value = reg_value;
+	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+	fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
+	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);

-	return ret;
+#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
+	ret = _wait_for_atomic(done, fast_timeout_us, 1);
 #undef done
-}

-/**
- * intel_wait_for_register - wait until register matches expected state
- * @dev_priv: the i915 device
- * @reg: the register to read
- * @mask: mask to apply to register value
- * @value: expected value
- * @timeout_ms: timeout in millisecond
- *
- * This routine waits until the target register @reg contains the expected
- * @value after applying the @mask, i.e. it waits until ::
- *
- *     (I915_READ(reg) & mask) == value
- *
- * Otherwise, the wait will timeout after @timeout_ms milliseconds.
- *
- * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
- */
-int intel_wait_for_register(struct drm_i915_private *dev_priv,
-			    i915_reg_t reg,
-			    u32 mask,
-			    u32 value,
-			    unsigned int timeout_ms)
-{
-	unsigned fw =
-		intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
-	int ret;
-
-	might_sleep();
-
-	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, fw);
-
-	ret = __intel_wait_for_register_fw(dev_priv,
-					   reg, mask, value,
-					   2, 0, NULL);
+	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);

-	intel_uncore_forcewake_put__locked(dev_priv, fw);
-	spin_unlock_irq(&dev_priv->uncore.lock);
+#define done (((reg_value = I915_READ_NOTRACE(reg)) & mask) == value)
+	if (ret && slow_timeout_ms)
+		ret = wait_for(done, slow_timeout_ms);
+#undef done

-	if (ret)
-		ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value,
-			       timeout_ms);
+	if (out_value)
+		*out_value = reg_value;

 	return ret;
 }
@@ -1693,11 +1649,11 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
 	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
 		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));

-	ret = intel_wait_for_register_fw(dev_priv,
-					 RING_RESET_CTL(engine->mmio_base),
-					 RESET_CTL_READY_TO_RESET,
-					 RESET_CTL_READY_TO_RESET,
-					 700);
+	ret = intel_wait_for_register(dev_priv,
+				      RING_RESET_CTL(engine->mmio_base),
+				      RESET_CTL_READY_TO_RESET,
+				      RESET_CTL_READY_TO_RESET,
+				      700);
 	if (ret)
 		DRM_ERROR("%s: reset request timeout\n", engine->name);

@@ -1780,21 +1736,10 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)

 int intel_guc_reset(struct drm_i915_private *dev_priv)
 {
-	int ret;
-	unsigned long irqflags;
-
 	if (!HAS_GUC(dev_priv))
 		return -EINVAL;

-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
-	ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
-
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-
-	return ret;
+	return gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
 }

 bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux