On Fri, Aug 26, 2022 at 02:27:18PM -0700, Matt Roper wrote: > On client DG2 platforms, optimal performance is achieved with the > hardware's default "age based" thread execution setting. However on > ATS-M, switching this to "round robin after dependencies" provides > better performance. We'll add a new "tuning" feature flag to the ATS-M > device info to enable/disable this setting. > > Bspec: 68331 > Cc: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> Reviewed-by: Matt Atwood <matthew.s.atwood@xxxxxxxxx> > Signed-off-by: Matt Roper <matthew.d.roper@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++ > drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++++++++ > drivers/gpu/drm/i915/i915_pci.c | 1 + > drivers/gpu/drm/i915/intel_device_info.h | 1 + > 4 files changed, 13 insertions(+) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > index 94f9ddcfb3a5..d414785003cc 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h > @@ -1110,6 +1110,8 @@ > #define GEN12_DISABLE_TDL_PUSH REG_BIT(9) > #define GEN11_DIS_PICK_2ND_EU REG_BIT(7) > #define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) > +#define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) > +#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) > > #define HSW_ROW_CHICKEN3 _MMIO(0xe49c) > #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c > index 3cdb8294e13f..ff8c3735abc9 100644 > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c > @@ -2700,6 +2700,15 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, > 0 /* write-only, so skip validation */, > true); > } > + > + /* > + * This tuning setting proves beneficial only on ATS-M designs; the > + * default "age based" setting is optimal on regular DG2 and other > + * platforms. > + */ > + if (INTEL_INFO(i915)->tuning_thread_rr_after_dep) > + wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE, > + THREAD_EX_ARB_MODE_RR_AFTER_DEP); > } > > /* > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index 857e8bb6865c..26b25d9434d6 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -1080,6 +1080,7 @@ static const struct intel_device_info ats_m_info = { > DG2_FEATURES, > .display = { 0 }, > .require_force_probe = 1, > + .tuning_thread_rr_after_dep = 1, > }; > > #define XE_HPC_FEATURES \ > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h > index 0ccde94b225f..6904ad03ca19 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -171,6 +171,7 @@ enum intel_ppgtt_type { > func(has_runtime_pm); \ > func(has_snoop); \ > func(has_coherent_ggtt); \ > + func(tuning_thread_rr_after_dep); \ > func(unfenced_needs_alignment); \ > func(hws_needs_physical); > > -- > 2.37.2 >