Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles for a single read/write and
avoids frequent software forcewake.
This certainly gives advantage over the forcewake as this new
mechanism “decouples” CPU cycles and allow them to complete even
when GT is in a CPD (frequency change) or C6 state.
This can co-exist with forcewake and we will continue to use forcewake
as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
separately and land into funny situations.
v2:
- Moved platform check out of the function and got rid of duplicate
functions to find out decoupled power domain (Chris)
- Added a check for forcewake already held and skipped decoupled
access (Chris)
- Skipped writing 64 bit registers through decoupled MMIO (Chris)
v3:
- Improved commit message with more info on decoupled mmio (Tvrtko)
- Changed decoupled operation to enum and used u32 instead of
uint_32 data type for register offset (Tvrtko)
- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
- Improved __gen9_decoupled_read and __gen9_decoupled_write routines (Tvrtko)
Signed-off-by: Zhe Wang <zhe1.wang@xxxxxxxxx>
Signed-off-by: Praveen Paneri <praveen.paneri@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_drv.h | 18 +++++-
drivers/gpu/drm/i915/i915_pci.c | 1 +
drivers/gpu/drm/i915/i915_reg.h | 7 +++
drivers/gpu/drm/i915/intel_uncore.c | 113 ++++++++++++++++++++++++++++++++++++
4 files changed, 138 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f8c66ee..bfdd55a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -559,6 +559,18 @@ enum forcewake_domains {
#define FW_REG_READ (1)
#define FW_REG_WRITE (2)
+enum decoupled_power_domains {
+ GEN9_DECOUPLED_PD_BLITTER = 0,
+ GEN9_DECOUPLED_PD_RENDER,
+ GEN9_DECOUPLED_PD_MEDIA,
+ GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+ GEN9_DECOUPLED_OP_WRITE = 0,
+ GEN9_DECOUPLED_OP_READ
+};
+
enum forcewake_domains
intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
i915_reg_t reg, unsigned int op);
@@ -690,7 +702,8 @@ struct intel_csr {
func(has_snoop) sep \
func(has_ddi) sep \
func(has_fpga_dbg) sep \
- func(has_pooled_eu)
+ func(has_pooled_eu) sep \
+ func(has_decoupled_mmio)
#define DEFINE_FLAG(name) u8 name:1
#define SEP_SEMICOLON ;
@@ -2869,6 +2882,9 @@ struct drm_i915_cmd_table {
#define GT_FREQUENCY_MULTIPLIER 50
#define GEN9_FREQ_SCALER 3
+#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
+ && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
+
#include "i915_trace.h"
static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 31e6edd..5c56c0c 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -360,6 +360,7 @@ static const struct intel_device_info intel_broxton_info = {
.has_hw_contexts = 1,
.has_logical_ring_contexts = 1,
.has_guc = 1,
+ .has_decoupled_mmio = 1,
.ddb_size = 512,
GEN_DEFAULT_PIPEOFFSETS,
IVB_CURSOR_OFFSETS,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8d44cee..bf7b4c9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7398,6 +7398,13 @@ enum {
#define SKL_FUSE_PG1_DIST_STATUS (1<<26)
#define SKL_FUSE_PG2_DIST_STATUS (1<<25)
+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO (1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT 28
+#define GEN9_DECOUPLED_OP_SHIFT 24
+
/* Per-pipe DDI Function Control */
#define _TRANS_DDI_FUNC_CTL_A 0x60400
#define _TRANS_DDI_FUNC_CTL_B 0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e2b188d..0af602e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -831,6 +831,72 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
__unclaimed_reg_debug(dev_priv, reg, read, before);
}
+static const enum decoupled_power_domains fw2dpd_engine[] = {
+ GEN9_DECOUPLED_PD_RENDER,
+ GEN9_DECOUPLED_PD_BLITTER,
+ GEN9_DECOUPLED_PD_ALL,
+ GEN9_DECOUPLED_PD_MEDIA,
+ GEN9_DECOUPLED_PD_ALL,
+ GEN9_DECOUPLED_PD_ALL,
+ GEN9_DECOUPLED_PD_ALL
+};
+
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
+ u32 reg,
+ enum forcewake_domains fw_engine,
+ enum decoupled_ops operation)
+{
+ enum decoupled_power_domains dpd_engine;
+ u32 ctrl_reg_data = 0;
+
+ dpd_engine = fw2dpd_engine[fw_engine - 1];
+
+ ctrl_reg_data |= reg;
+ ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+ ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
+ __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+ ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+ __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+ if (wait_for_atomic((__raw_i915_read32(dev_priv,
+ GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
+ FORCEWAKE_ACK_TIMEOUT_MS))
+