On Mon, Aug 26, 2019 at 09:39:48AM +0100, Chris Wilson wrote: > Quoting Prathap Kumar Valsan (2019-08-26 00:35:27) > > To provide shared last-level-cache isolation to cpu workloads running > > concurrently with gpu workloads, the gpu allocation of cache lines needs > > to be restricted to certain ways. Currently GPU hardware supports four > > class-of-service(CLOS) levels and there is an associated way-mask for > > each CLOS. > > > > Hardware supports reading supported way-mask configuration for GPU using > > a bios pcode interface. The supported way-masks and the one currently > > active is communicated to userspace via a sysfs file--closctrl. Admin user > > can then select a new mask by writing the mask value to the file. > > > > Note of Caution: Restricting cache ways using this mechanism presents a > > larger attack surface for side-channel attacks. > > > > Example usage: > > The active way-mask is highlighted within square brackets. > > > cat /sys/class/drm/card0/closctrl > > [0xffff] 0xff00 0xc000 0x8000 > > > > CLOS0 is currently active. > > > > > echo 0x8000 > /sys/class/drm/card0/closctrl > > > cat /sys/class/drm/card0/closctrl > > 0xffff 0xff00 0xc000 [0x8000] > > > > CLOS3 is currently active > > > > Signed-off-by: Prathap Kumar Valsan <prathap.kumar.valsan@xxxxxxxxx> > > --- > > Changes in v2: > > Declare closctrl_show and closctrl_store as static functions. > > drivers/gpu/drm/i915/gt/intel_mocs.c | 57 ++++++++++++++++++++--- > > drivers/gpu/drm/i915/gt/intel_mocs.h | 1 + > > drivers/gpu/drm/i915/i915_drv.h | 8 ++++ > > drivers/gpu/drm/i915/i915_reg.h | 1 + > > drivers/gpu/drm/i915/i915_sysfs.c | 67 ++++++++++++++++++++++++++++ > > 5 files changed, 129 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c > > index 728704bbbe18..dd13e61944fd 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > > @@ -26,6 +26,7 @@ > > #include "intel_gt.h" > > #include "intel_mocs.h" > > #include "intel_lrc.h" > > +#include "intel_sideband.h" > > > > /* structures required */ > > struct drm_i915_mocs_entry { > > @@ -51,6 +52,7 @@ struct drm_i915_mocs_table { > > #define LE_SCF(value) ((value) << 14) > > #define LE_COS(value) ((value) << 15) > > #define LE_SSE(value) ((value) << 17) > > +#define LE_COS_MASK GENMASK(16, 15) > > > > /* Defines for the tables (LNCFMOCS0 - LNCFMOCS31) - two entries per word */ > > #define L3_ESC(value) ((value) << 0) > > @@ -408,10 +410,13 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) > > unused_value); > > } > > > > -static void intel_mocs_init_global(struct intel_gt *gt) > > +void intel_mocs_init_global(struct intel_gt *gt) > > { > > + struct drm_i915_private *i915 = gt->i915; > > struct intel_uncore *uncore = gt->uncore; > > struct drm_i915_mocs_table table; > > + unsigned int active_clos; > > + u32 value, unused_value; > > unsigned int index; > > > > GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915)); > > @@ -422,20 +427,31 @@ static void intel_mocs_init_global(struct intel_gt *gt) > > if (GEM_DEBUG_WARN_ON(table.size > table.n_entries)) > > return; > > > > - for (index = 0; index < table.size; index++) > > + active_clos = atomic_read(&i915->clos.active_clos); > > + > > + for (index = 0; index < table.size; index++) { > > + value = table.table[index].control_value; > > + value &= ~LE_COS_MASK; > > + value |= FIELD_PREP(LE_COS_MASK, active_clos); > > + > > intel_uncore_write(uncore, > > GEN12_GLOBAL_MOCS(index), > > - table.table[index].control_value); > > + value); > > + } > > > > /* > > * Ok, now set the unused entries to the invalid entry (index 0). These > > * entries are officially undefined and no contract for the contents and > > * settings is given for these entries. > > */ > > + unused_value = table.table[0].control_value; > > + unused_value &= ~LE_COS_MASK; > > + unused_value |= FIELD_PREP(LE_COS_MASK, active_clos); > > + > > for (; index < table.n_entries; index++) > > intel_uncore_write(uncore, > > GEN12_GLOBAL_MOCS(index), > > - table.table[0].control_value); > > + unused_value); > > } > > > > static int emit_mocs_control_table(struct i915_request *rq, > > @@ -625,10 +641,41 @@ int intel_mocs_emit(struct i915_request *rq) > > return 0; > > } > > > > +static void intel_read_clos_way_mask(struct intel_gt *gt) > > +{ > > + struct drm_i915_private *i915 = gt->i915; > > + struct drm_i915_mocs_table table; > > + int ret, i; > > + u32 val; > > + > > + if (!get_mocs_settings(gt, &table)) > > + return; > > + > > + /* COS is same for all entries */ > > + atomic_set(&i915->clos.active_clos, > > + FIELD_GET(LE_COS_MASK, get_entry_control(&table, 0))); > > + for (i = 0; i < NUM_OF_CLOS; i++) { > > + val = i; > > + ret = sandybridge_pcode_read(i915, > > + ICL_PCODE_LLC_COS_WAY_MASK_INFO, > > + &val, NULL); > > + if (ret) { > > + DRM_ERROR("Mailbox read error = %d\n", ret); > > + return; > > + } > > + > > + i915->clos.way_mask[i] = val; > > + } > > + > > + i915->clos.support_way_mask_read = true; > > +} > > + > > void intel_mocs_init(struct intel_gt *gt) > > { > > intel_mocs_init_l3cc_table(gt); > > > > - if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) > > + if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915)) { > > + intel_read_clos_way_mask(gt); > > intel_mocs_init_global(gt); > > + } > > } > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h > > index 2ae816b7ca19..e64e1b104753 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.h > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h > > @@ -53,6 +53,7 @@ struct i915_request; > > struct intel_engine_cs; > > struct intel_gt; > > > > +void intel_mocs_init_global(struct intel_gt *gt); > > void intel_mocs_init(struct intel_gt *gt); > > void intel_mocs_init_engine(struct intel_engine_cs *engine); > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index b42651a387d9..0e250416c5a9 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -1621,6 +1621,14 @@ struct drm_i915_private { > > bool distrust_bios_wm; > > } wm; > > > > + /* Last Level Cache Class of Service */ > > + struct { > > + bool support_way_mask_read; > > + atomic_t active_clos; > > +#define NUM_OF_CLOS 4 > > + u16 way_mask[NUM_OF_CLOS]; > > + } clos; > > + > > struct dram_info { > > bool valid; > > bool is_16gb_dimm; > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > > index 02e1ef10c47e..399acb7a36d8 100644 > > --- a/drivers/gpu/drm/i915/i915_reg.h > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > @@ -8845,6 +8845,7 @@ enum { > > #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd > > #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) > > #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) > > +#define ICL_PCODE_LLC_COS_WAY_MASK_INFO 0x1d > > #define GEN6_PCODE_READ_D_COMP 0x10 > > #define GEN6_PCODE_WRITE_D_COMP 0x11 > > #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 > > diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c > > index d8a3b180c084..b53143416396 100644 > > --- a/drivers/gpu/drm/i915/i915_sysfs.c > > +++ b/drivers/gpu/drm/i915/i915_sysfs.c > > @@ -34,6 +34,7 @@ > > #include "i915_sysfs.h" > > #include "intel_pm.h" > > #include "intel_sideband.h" > > +#include "gt/intel_mocs.h" > > > > static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev) > > { > > @@ -257,6 +258,62 @@ static const struct bin_attribute dpf_attrs_1 = { > > .private = (void *)1 > > }; > > > > +static ssize_t closctrl_show(struct device *kdev, > > + struct device_attribute *attr, char *buf) > > +{ > > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > > + ssize_t len = 0; > > + int i; > > + > > + for (i = 0; i < NUM_OF_CLOS; i++) { > > + if (i == atomic_read(&dev_priv->clos.active_clos)) > > Reading an atomic more than once is liable to give you different > answers. > > > + len += snprintf(buf + len, PAGE_SIZE, "%s0x%x%s ", > > + "[", dev_priv->clos.way_mask[i], "]"); > > + else > > + len += snprintf(buf + len, PAGE_SIZE, "0x%x ", > > + dev_priv->clos.way_mask[i]); > > + } > > + len += snprintf(buf + len, PAGE_SIZE, "\n"); > > + > > + return len; > > +} > > + > > +static ssize_t closctrl_store(struct device *kdev, > > + struct device_attribute *attr, > > + const char *buf, size_t count) > > +{ > > + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); > > + u8 active_clos, clos_index; > > + bool valid_mask = false; > > + ssize_t ret; > > + u16 way_mask; > > + > > + ret = kstrtou16(buf, 0, &way_mask); > > + if (ret) > > + return ret; > > + > > + active_clos = atomic_read(&dev_priv->clos.active_clos); > > + > > + if (dev_priv->clos.way_mask[active_clos] == way_mask) > > + return count; > > + > > + for (clos_index = 0; clos_index < NUM_OF_CLOS; clos_index++) { > > + if (dev_priv->clos.way_mask[clos_index] == way_mask) { > > + atomic_set(&dev_priv->clos.active_clos, clos_index); > > + valid_mask = true; > > + break; > > + } > > + } > > How is this serialised against multiple users changing the setting? > Should have been using mutex to serialize. Will fix. > This is not an atomic operation, atomic_t should have been a warning. > -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx