On 5/29/2015 12:05 PM, Michel Thierry wrote:
On 5/22/2015 6:04 PM, Mika Kuoppala wrote:
With BDW/SKL and 32bit addressing mode only, the hardware preloads
pdps. However the TLB invalidation only has effect on levels below
the pdps. This means that if pdps change, hw might access with
stale pdp entry.
To combat this problem, preallocate the top pdps so that hw sees
them as immutable for each context.
Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
Cc: Rafael Barbalho <rafael.barbalho@xxxxxxxxx>
Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 50
+++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_reg.h | 17 +++++++++++++
drivers/gpu/drm/i915/intel_lrc.c | 15 +----------
3 files changed, 68 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0ffd459..1a5ad4c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -941,6 +941,48 @@ err_out:
return ret;
}
+/* With some architectures and 32bit legacy mode, hardware pre-loads
the
+ * top level pdps but the tlb invalidation only invalidates the
lower levels.
+ * This might lead to hw fetching with stale pdp entries if top level
+ * structure changes, ie va space grows with dynamic page tables.
+ */
+static bool hw_wont_flush_pdp_tlbs(struct i915_hw_ppgtt *ppgtt)
+{
+ struct drm_device *dev = ppgtt->base.dev;
+
+ if (GEN8_CTX_ADDRESSING_MODE != LEGACY_32B_CONTEXT)
+ return false;
+
+ if (IS_BROADWELL(dev) || IS_SKYLAKE(dev))
+ return true;
The pd load restriction is also true for chv and bxt.
And to be safe, we can set reg 0x4030 bit14 to '1' (PD load disable).
Since this register is not part of the context state, it can be added
with the other platform workarounds in intel_pm.c.
+
+ return false;
+}
+
+static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
+{
+ unsigned long *new_page_dirs, **new_page_tables;
+ int ret;
+
+ /* We allocate temp bitmap for page tables for no gain
+ * but as this is for init only, lets keep the things simple
+ */
+ ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
+ if (ret)
+ return ret;
+
+ /* Allocate for all pdps regardless of how the ppgtt
+ * was defined.
+ */
+ ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp,
+ 0, 1ULL << 32,
+ new_page_dirs);
+
+ free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+
+ return ret;
+}
+
/*
* GEN8 legacy ppgtt programming is accomplished through a max 4
PDP registers
* with a net effect resembling a 2-level page table in normal x86
terms. Each
@@ -972,6 +1014,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt
*ppgtt)
ppgtt->switch_mm = gen8_mm_switch;
+ if (hw_wont_flush_pdp_tlbs(ppgtt)) {
+ /* Avoid the tlb flush bug by preallocating
+ * whole top level pdp structure so it stays
+ * static even if our va space grows.
+ */
+ return gen8_preallocate_top_level_pdps(ppgtt);
+ }
+
Also, we will need the same hw_wont_flush check in the cleanup function,
and iterate each_pdpe (pd) from 0 to 4GiB (otherwise we will leak some
of the preallocated page dirs).
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h
b/drivers/gpu/drm/i915/i915_reg.h
index 6eeba63..334324b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2777,6 +2777,23 @@ enum skl_disp_power_wells {
#define VLV_CLK_CTL2 0x101104
#define CLK_CTL2_CZCOUNT_30NS_SHIFT 28
+/* Context descriptor format bits */
+#define GEN8_CTX_VALID (1<<0)
+#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
+#define GEN8_CTX_FORCE_RESTORE (1<<2)
+#define GEN8_CTX_L3LLC_COHERENT (1<<5)
+#define GEN8_CTX_PRIVILEGE (1<<8)
+
+enum {
+ ADVANCED_CONTEXT = 0,
+ LEGACY_32B_CONTEXT,
+ ADVANCED_AD_CONTEXT,
+ LEGACY_64B_CONTEXT
+};
+
+#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
+#define GEN8_CTX_ADDRESSING_MODE LEGACY_32B_CONTEXT
+
/*
* Overlay regs
*/
diff --git a/drivers/gpu/drm/i915/intel_lrc.c
b/drivers/gpu/drm/i915/intel_lrc.c
index 96ae90a..d793d4e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -183,12 +183,6 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
-#define GEN8_CTX_VALID (1<<0)
-#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
-#define GEN8_CTX_FORCE_RESTORE (1<<2)
-#define GEN8_CTX_L3LLC_COHERENT (1<<5)
-#define GEN8_CTX_PRIVILEGE (1<<8)
-
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
const u64 _addr = test_bit(n, ppgtt->pdp.used_pdpes) ? \
ppgtt->pdp.page_directory[n]->daddr : \
@@ -198,13 +192,6 @@
}
enum {
- ADVANCED_CONTEXT = 0,
- LEGACY_CONTEXT,
- ADVANCED_AD_CONTEXT,
- LEGACY_64B_CONTEXT
-};
-#define GEN8_CTX_MODE_SHIFT 3
-enum {
FAULT_AND_HANG = 0,
FAULT_AND_HALT, /* Debug only */
FAULT_AND_STREAM,
@@ -273,7 +260,7 @@ static uint64_t execlists_ctx_descriptor(struct
intel_engine_cs *ring,
WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
desc = GEN8_CTX_VALID;
- desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
+ desc |= GEN8_CTX_ADDRESSING_MODE <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
if (IS_GEN8(ctx_obj->base.dev))
desc |= GEN8_CTX_L3LLC_COHERENT;
desc |= GEN8_CTX_PRIVILEGE;
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx