With BDW/SKL and 32bit addressing mode only, the hardware preloads pdps. However the TLB invalidation only has effect on levels below the pdps. This means that if pdps change, hw might access with stale pdp entry. To combat this problem, preallocate the top pdps so that hw sees them as immutable for each context. Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> Cc: Rafael Barbalho <rafael.barbalho@xxxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 50 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 17 +++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 15 +---------- 3 files changed, 68 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0ffd459..1a5ad4c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -941,6 +941,48 @@ err_out: return ret; } +/* With some architectures and 32bit legacy mode, hardware pre-loads the + * top level pdps but the tlb invalidation only invalidates the lower levels. + * This might lead to hw fetching with stale pdp entries if top level + * structure changes, ie va space grows with dynamic page tables. + */ +static bool hw_wont_flush_pdp_tlbs(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + + if (GEN8_CTX_ADDRESSING_MODE != LEGACY_32B_CONTEXT) + return false; + + if (IS_BROADWELL(dev) || IS_SKYLAKE(dev)) + return true; + + return false; +} + +static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) +{ + unsigned long *new_page_dirs, **new_page_tables; + int ret; + + /* We allocate temp bitmap for page tables for no gain + * but as this is for init only, lets keep the things simple + */ + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables); + if (ret) + return ret; + + /* Allocate for all pdps regardless of how the ppgtt + * was defined. + */ + ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, + 0, 1ULL << 32, + new_page_dirs); + + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + + return ret; +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -972,6 +1014,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->switch_mm = gen8_mm_switch; + if (hw_wont_flush_pdp_tlbs(ppgtt)) { + /* Avoid the tlb flush bug by preallocating + * whole top level pdp structure so it stays + * static even if our va space grows. + */ + return gen8_preallocate_top_level_pdps(ppgtt); + } + return 0; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 6eeba63..334324b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2777,6 +2777,23 @@ enum skl_disp_power_wells { #define VLV_CLK_CTL2 0x101104 #define CLK_CTL2_CZCOUNT_30NS_SHIFT 28 +/* Context descriptor format bits */ +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) + +enum { + ADVANCED_CONTEXT = 0, + LEGACY_32B_CONTEXT, + ADVANCED_AD_CONTEXT, + LEGACY_64B_CONTEXT +}; + +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 +#define GEN8_CTX_ADDRESSING_MODE LEGACY_32B_CONTEXT + /* * Overlay regs */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 96ae90a..d793d4e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -183,12 +183,6 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define GEN8_CTX_VALID (1<<0) -#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) -#define GEN8_CTX_FORCE_RESTORE (1<<2) -#define GEN8_CTX_L3LLC_COHERENT (1<<5) -#define GEN8_CTX_PRIVILEGE (1<<8) - #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \ const u64 _addr = test_bit(n, ppgtt->pdp.used_pdpes) ? \ ppgtt->pdp.page_directory[n]->daddr : \ @@ -198,13 +192,6 @@ } enum { - ADVANCED_CONTEXT = 0, - LEGACY_CONTEXT, - ADVANCED_AD_CONTEXT, - LEGACY_64B_CONTEXT -}; -#define GEN8_CTX_MODE_SHIFT 3 -enum { FAULT_AND_HANG = 0, FAULT_AND_HALT, /* Debug only */ FAULT_AND_STREAM, @@ -273,7 +260,7 @@ static uint64_t execlists_ctx_descriptor(struct intel_engine_cs *ring, WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); desc = GEN8_CTX_VALID; - desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; + desc |= GEN8_CTX_ADDRESSING_MODE << GEN8_CTX_ADDRESSING_MODE_SHIFT; if (IS_GEN8(ctx_obj->base.dev)) desc |= GEN8_CTX_L3LLC_COHERENT; desc |= GEN8_CTX_PRIVILEGE; -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx