Re: [PATCH v5] drm/i915/bxt: Broxton decoupled MMIO

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Tvrtko,

On Wednesday 16 November 2016 01:55 PM, Tvrtko Ursulin wrote:

On 15/11/2016 17:19, Praveen Paneri wrote:
Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles for a single read/write and
avoids frequent software forcewake.
This certainly gives advantage over the forcewake as this new
mechanism “decouples” CPU cycles and allow them to complete even
when GT is in a CPD (frequency change) or C6 state.

This can co-exist with forcewake and we will continue to use forcewake
as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
separately and land into funny situations.

v2:
- Moved platform check out of the function and got rid of duplicate
 functions to find out decoupled power domain (Chris)
- Added a check for forcewake already held and skipped decoupled
 access (Chris)
- Skipped writing 64 bit registers through decoupled MMIO (Chris)

v3:
- Improved commit message with more info on decoupled mmio (Tvrtko)
- Changed decoupled operation to enum and used u32 instead of
 uint_32 data type for register offset (Tvrtko)
- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
- Improved __gen9_decoupled_read and __gen9_decoupled_write
 routines (Tvrtko)

v4:
- Fixed alignment and variable names (Chris)
- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)

v5:
- Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko)
- Sanitize info->had_decoupled_mmio at init (Chris)

Signed-off-by: Zhe Wang <zhe1.wang@xxxxxxxxx>
Signed-off-by: Praveen Paneri <praveen.paneri@xxxxxxxxx>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Please put "(v4)" when you carry over the r-b and it wasn't explicitly
said you are OK to just keep it.
Sure will take care going fwd.

---
 drivers/gpu/drm/i915/i915_drv.h     |  17 +++++-
 drivers/gpu/drm/i915/i915_pci.c     |   1 +
 drivers/gpu/drm/i915/i915_reg.h     |   7 +++
 drivers/gpu/drm/i915/intel_uncore.c | 115
++++++++++++++++++++++++++++++++++++
 4 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h
b/drivers/gpu/drm/i915/i915_drv.h
index 4e7148a..c1eec04 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -549,6 +549,18 @@ enum forcewake_domains {
 #define FW_REG_READ  (1)
 #define FW_REG_WRITE (2)

+enum decoupled_power_domain {
+    GEN9_DECOUPLED_PD_BLITTER = 0,
+    GEN9_DECOUPLED_PD_RENDER,
+    GEN9_DECOUPLED_PD_MEDIA,
+    GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+    GEN9_DECOUPLED_OP_WRITE = 0,
+    GEN9_DECOUPLED_OP_READ
+};
+
 enum forcewake_domains
 intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
                    i915_reg_t reg, unsigned int op);
@@ -683,7 +695,8 @@ struct intel_csr {
     func(cursor_needs_physical); \
     func(hws_needs_physical); \
     func(overlay_needs_physical); \
-    func(supports_tv)
+    func(supports_tv); \
+    func(has_decoupled_mmio)

 struct sseu_dev_info {
     u8 slice_mask;
@@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table {
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3

+#define HAS_DECOUPLED_MMIO(dev_priv)
(INTEL_INFO(dev_priv)->has_decoupled_mmio)
+
 #include "i915_trace.h"

 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
*dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c
b/drivers/gpu/drm/i915/i915_pci.c
index 70a99ce..fce8e19 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -363,6 +363,7 @@
     .has_hw_contexts = 1,
     .has_logical_ring_contexts = 1,
     .has_guc = 1,
+    .has_decoupled_mmio = 1,
     .ddb_size = 512,
     GEN_DEFAULT_PIPEOFFSETS,
     IVB_CURSOR_OFFSETS,
diff --git a/drivers/gpu/drm/i915/i915_reg.h
b/drivers/gpu/drm/i915/i915_reg.h
index 3361d7f..c70c07a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7342,6 +7342,13 @@ enum {
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)

+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO            (1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT            28
+#define GEN9_DECOUPLED_OP_SHIFT            24
+
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A        0x60400
 #define _TRANS_DDI_FUNC_CTL_B        0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c
b/drivers/gpu/drm/i915/intel_uncore.c
index e2b188d..e953303 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct
drm_i915_private *dev_priv)
 static void __intel_uncore_early_sanitize(struct drm_i915_private
*dev_priv,
                       bool restore_forcewake)
 {
+    struct intel_device_info *info = mkwrite_device_info(dev_priv);
+
     /* clear out unclaimed reg detection bit */
     if (check_for_unclaimed_mmio(dev_priv))
         DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n");
@@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct
drm_i915_private *dev_priv,
                    GT_FIFO_CTL_RC6_POLICY_STALL);
     }

+    /* Enable Decoupled MMIO only on BXT C stepping onwards */
+    if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
+        info->has_decoupled_mmio = false;

Why have you decided to put it in here? It doesn't make much difference
except conceptually. This runs every time on resume while I thought
intel_device_info_runtime_init would have been more appropriate.
Sorry about picking the bad place for this but intel_uncore_init() which uses this info gets executed before intel_device_info_runtime_init() in driver_load sequence. If I am not wrong, we need to find an appropriate place for this.

Thanks,
Praveen

Regards,

Tvrtko

+
     intel_uncore_forcewake_reset(dev_priv, restore_forcewake);
 }

@@ -831,6 +837,66 @@ static bool is_gen8_shadowed(u32 offset)
     __unclaimed_reg_debug(dev_priv, reg, read, before);
 }

+static const enum decoupled_power_domain fw2dpd_domain[] = {
+    GEN9_DECOUPLED_PD_RENDER,
+    GEN9_DECOUPLED_PD_BLITTER,
+    GEN9_DECOUPLED_PD_ALL,
+    GEN9_DECOUPLED_PD_MEDIA,
+    GEN9_DECOUPLED_PD_ALL,
+    GEN9_DECOUPLED_PD_ALL,
+    GEN9_DECOUPLED_PD_ALL
+};
+
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private
*dev_priv,
+                     u32 reg,
+                     enum forcewake_domains fw_domain,
+                     enum decoupled_ops operation)
+{
+    enum decoupled_power_domain dp_domain;
+    u32 ctrl_reg_data = 0;
+
+    dp_domain = fw2dpd_domain[fw_domain - 1];
+
+    ctrl_reg_data |= reg;
+    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+    ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
+    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
ctrl_reg_data);
+
+    if (wait_for_atomic((__raw_i915_read32(dev_priv,
+                GEN9_DECOUPLED_REG0_DW1) &
+                GEN9_DECOUPLED_DW1_GO) == 0,
+                FORCEWAKE_ACK_TIMEOUT_MS))
+        DRM_ERROR("Decoupled MMIO wait timed out\n");
+}
+
+static inline u32
+__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
+                 u32 reg,
+                 enum forcewake_domains fw_domain)
+{
+    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
+                     GEN9_DECOUPLED_OP_READ);
+
+    return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
+}
+
+static inline void
+__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
+                u32 reg, u32 data,
+                enum forcewake_domains fw_domain)
+{
+
+    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
+
+    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
+                     GEN9_DECOUPLED_OP_WRITE);
+}
+
+
 #define GEN2_READ_HEADER(x) \
     u##x val = 0; \
     assert_rpm_wakelock_held(dev_priv);
@@ -935,6 +1001,28 @@ static inline void __force_wake_auto(struct
drm_i915_private *dev_priv,
     GEN6_READ_FOOTER; \
 }

+#define __gen9_decoupled_read(x) \
+static u##x \
+gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
+               i915_reg_t reg, bool trace) { \
+    enum forcewake_domains fw_engine; \
+    GEN6_READ_HEADER(x); \
+    fw_engine = __fwtable_reg_read_fw_domains(offset); \
+    if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
+        unsigned i; \
+        u32 *ptr_data = (u32 *) &val; \
+        for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
+            *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
+                                 offset, \
+                                 fw_engine); \
+    } else { \
+        val = __raw_i915_read##x(dev_priv, reg); \
+    } \
+    GEN6_READ_FOOTER; \
+}
+
+__gen9_decoupled_read(32)
+__gen9_decoupled_read(64)
 __fwtable_read(8)
 __fwtable_read(16)
 __fwtable_read(32)
@@ -1064,6 +1152,25 @@ static inline void __force_wake_auto(struct
drm_i915_private *dev_priv,
     GEN6_WRITE_FOOTER; \
 }

+#define __gen9_decoupled_write(x) \
+static void \
+gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
+            i915_reg_t reg, u##x val, \
+        bool trace) { \
+    enum forcewake_domains fw_engine; \
+    GEN6_WRITE_HEADER; \
+    fw_engine = __fwtable_reg_write_fw_domains(offset); \
+    if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
+        __gen9_decoupled_mmio_write(dev_priv, \
+                        offset, \
+                        val, \
+                        fw_engine); \
+    else \
+        __raw_i915_write##x(dev_priv, reg, val); \
+    GEN6_WRITE_FOOTER; \
+}
+
+__gen9_decoupled_write(32)
 __fwtable_write(8)
 __fwtable_write(16)
 __fwtable_write(32)
@@ -1287,6 +1394,14 @@ void intel_uncore_init(struct drm_i915_private
*dev_priv)
         ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
         ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
         ASSIGN_READ_MMIO_VFUNCS(fwtable);
+        if (HAS_DECOUPLED_MMIO(dev_priv)) {
+            dev_priv->uncore.funcs.mmio_readl =
+                        gen9_decoupled_read32;
+            dev_priv->uncore.funcs.mmio_readq =
+                        gen9_decoupled_read64;
+            dev_priv->uncore.funcs.mmio_writel =
+                        gen9_decoupled_write32;
+        }
         break;
     case 8:
         if (IS_CHERRYVIEW(dev_priv)) {

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux