From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> Since SKL the eLLC has been sitting on the far side of the system agent, meaning the display engine can utilize it. Let's enable that. I chose WB for the caching mode, because my numbers are indicating that WT might actually be WB and WC might actually be UC. I'm not 100% sure that is indeed the case but at least my simple rendercopy based benchmark didn't see any difference in performance. Also if I configure things to do LLCeLLC+WT I still get cache dirt on my screen, suggesting that is in fact operating in WB mode anyway. This is also the reason I had to fix the MOCS target cache to really say PTE rather than LLC+eLLC. Caveat: I've not benchmarked any real workloads. IIRC Eero did benchmark an earlier version, but that didn't have the PTE vs. LLC+eLLC MOCS fix so it wasn't actually doing the right thing most likely. Cc: Eero Tamminen <eero.t.tamminen@xxxxxxxxx> Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 3 +-- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 +++++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/intel_mocs.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 35d0782c077e..2a4f33fa2bba 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2517,8 +2517,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) -#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ - IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) +#define HAS_WT(dev_priv) HAS_EDRAM(dev_priv) #define HWS_NEEDS_PHYSICAL(dev_priv) (INTEL_INFO(dev_priv)->hws_needs_physical) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8f460cc4cc1f..038fbf52a997 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3071,7 +3071,7 @@ static void cnl_setup_private_ppat(struct intel_ppat *ppat) __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); - __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); @@ -3109,7 +3109,10 @@ static void bdw_setup_private_ppat(struct intel_ppat *ppat) __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */ __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */ - __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */ + if (INTEL_GEN(ppat->i915) >= 9) + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); /* for scanout with eLLC */ + else + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */ __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */ __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)); __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f597f35b109b..47adc7268867 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -139,7 +139,7 @@ typedef u64 gen8_ppgtt_pml4e_t; #define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE 0 /* WB LLC */ #define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */ -#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */ +#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT LLCeLLC (HSW/BDW) or WB eLLC (SKL+) */ #define CHV_PPAT_SNOOP (1<<6) #define GEN8_PPAT_AGE(x) ((x)<<4) diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 274ba78500c0..d984ccff94ef 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -115,7 +115,7 @@ struct drm_i915_mocs_table { LE_1_UC | LE_TC_2_LLC_ELLC, \ L3_1_UC), \ MOCS_ENTRY(I915_MOCS_PTE, \ - LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \ + LE_0_PAGETABLE | LE_TC_0_PAGETABLE | LE_LRUM(3), \ L3_3_WB) static const struct drm_i915_mocs_entry skylake_mocs_table[] = { -- 2.21.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx