Hi Babu, On 10/17/2022 3:27 PM, Babu Moger wrote: > The current event configuration can be changed by the user by writing > to the configuration file /sys/fs/resctrl/info/L3_MON/mbm_total_config. > The event configuration settings are domain specific and will affect all > the CPUs in the domain. > > Following are the types of events supported: > > ==== =========================================================== > Bits Description > ==== =========================================================== > 6 Dirty Victims from the QOS domain to all types of memory > 5 Reads to slow memory in the non-local NUMA domain > 4 Reads to slow memory in the local NUMA domain > 3 Non-temporal writes to non-local NUMA domain > 2 Non-temporal writes to local NUMA domain > 1 Reads to memory in the non-local NUMA domain > 0 Reads to memory in the local NUMA domain > ==== =========================================================== > > For example: > To change the mbm_total_bytes to count only reads on domain 0, the bits > 0, 1, 4 and 5 needs to be set, which is 110011b (in hex 0x33). Run the > command. > $echo 0=0x33 > /sys/fs/resctrl/info/L3_MON/mbm_total_config > > To change the mbm_total_bytes to count all the slow memory reads on > domain 1, the bits 4 and 5 needs to be set which is 110000b (in hex 0x30). > Run the command. > $echo 1=0x30 > /sys/fs/resctrl/info/L3_MON/mbm_total_config > > Signed-off-by: Babu Moger <babu.moger@xxxxxxx> > --- > arch/x86/kernel/cpu/resctrl/internal.h | 23 +++++ > arch/x86/kernel/cpu/resctrl/rdtgroup.c | 146 ++++++++++++++++++++++++++++++++ > 2 files changed, 169 insertions(+) > > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h > index 326a1b582f38..c42b12934a0e 100644 > --- a/arch/x86/kernel/cpu/resctrl/internal.h > +++ b/arch/x86/kernel/cpu/resctrl/internal.h > @@ -42,6 +42,29 @@ > */ > #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) > > +/* Reads to Local DRAM Memory */ > +#define READS_TO_LOCAL_MEM BIT(0) > + > +/* Reads to Remote DRAM Memory */ > +#define READS_TO_REMOTE_MEM BIT(1) > + > +/* Non-Temporal Writes to Local Memory */ > +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) > + > +/* Non-Temporal Writes to Remote Memory */ > +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) > + > +/* Reads to Local Memory the system identifies as "Slow Memory" */ > +#define READS_TO_LOCAL_S_MEM BIT(4) > + > +/* Reads to Remote Memory the system identifies as "Slow Memory" */ > +#define READS_TO_REMOTE_S_MEM BIT(5) > + > +/* Dirty Victims to All Types of Memory */ > +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) > + > +/* Max event bits supported */ > +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) > > struct rdt_fs_context { > struct kernfs_fs_context kfc; > diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > index 305fb0475970..25ff56ecb817 100644 > --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c > +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > @@ -1494,6 +1494,151 @@ static int mbm_local_config_show(struct kernfs_open_file *of, > return 0; > } > > +static void mon_event_config_write(void *info) > +{ > + struct mon_config_info *mon_info = info; > + u32 msr_index; > + > + switch (mon_info->evtid) { > + case QOS_L3_MBM_TOTAL_EVENT_ID: > + msr_index = 0; > + break; > + case QOS_L3_MBM_LOCAL_EVENT_ID: > + msr_index = 1; > + break; > + default: > + /* Not expected to come here */ > + return; > + } > + > + wrmsr(MSR_IA32_EVT_CFG_BASE + msr_index, mon_info->mon_config, 0); > +} This duplicates most of mon_event_config_read() from earlier patch. Would a utility like "mon_event_config_index_get()" help here? > + > +static int mbm_config_write(struct rdt_resource *r, struct rdt_domain *d, > + u32 evtid, u32 val) > +{ > + struct mon_config_info mon_info = {0}; > + cpumask_var_t cpu_mask; > + int ret = 0, cpu; > + > + rdt_last_cmd_clear(); > + > + /* mon_config cannot be more than the supported set of events */ > + if (val > MAX_EVT_CONFIG_BITS) { > + rdt_last_cmd_puts("Invalid event configuration\n"); > + return -EINVAL; > + } > + > + cpus_read_lock(); > + > + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) { > + rdt_last_cmd_puts("cpu_mask allocation failed\n"); > + ret = -ENOMEM; > + goto e_unlock; > + } > + > + /* > + * Read the current config value first. If both are same then > + * we dont need to write it again. "we dont need" -> "we don't need" > + */ > + mon_info.evtid = evtid; > + mondata_config_read(d, &mon_info); > + if (mon_info.mon_config == val) > + goto e_cpumask; > + > + mon_info.mon_config = val; > + > + /* Pick all the CPUs in the domain instance */ > + for_each_cpu(cpu, &d->cpu_mask) > + cpumask_set_cpu(cpu, cpu_mask); > + Why is it necessary to create a new CPU mask just to populate it with all CPUs in another CPU mask? Why not just use the original CPU mask? > + /* > + * Update MSR_IA32_EVT_CFG_BASE MSRs on all the CPUs in > + * cpu_mask. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE > + * are scoped at the domain level. Writing any of these MSRs > + * on one CPU is supposed to be observed by all CPUs in the > + * domain. However, the hardware team recommends to update > + * these MSRs on all the CPUs in the domain. > + */ > + on_each_cpu_mask(cpu_mask, mon_event_config_write, &mon_info, 1); > + > + /* > + * When an Event Configuration is changed, the bandwidth counters > + * for all RMIDs and Events will be cleared by the hardware. The > + * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for > + * every RMID on the next read to any event for every RMID. > + * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) > + * cleared while it is tracked by the hardware. Clear the > + * mbm_local and mbm_total counts for all the RMIDs. > + */ > + memset(d->mbm_local, 0, sizeof(struct mbm_state) * r->num_rmid); > + memset(d->mbm_total, 0, sizeof(struct mbm_state) * r->num_rmid); > + > +e_cpumask: > + free_cpumask_var(cpu_mask); > + > +e_unlock: > + cpus_read_unlock(); > + > + return ret; > +} > + > +static int mon_config_parse(struct rdt_resource *r, char *tok, u32 evtid) > +{ > + char *dom_str = NULL, *id_str; > + struct rdt_domain *d; > + unsigned long dom_id, val; Please use reverse fir tree order. > + int ret = 0; > + > +next: > + if (!tok || tok[0] == '\0') > + return 0; > + > + /* Start processing the strings for each domain */ > + dom_str = strim(strsep(&tok, ";")); > + id_str = strsep(&dom_str, "="); > + > + if (!dom_str || kstrtoul(id_str, 10, &dom_id)) { > + rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); > + return -EINVAL; > + } > + > + if (!dom_str || kstrtoul(dom_str, 16, &val)) { > + rdt_last_cmd_puts("Missing '=' or non-numeric event configuration value\n"); > + return -EINVAL; > + } > + > + list_for_each_entry(d, &r->domains, list) { > + if (d->id == dom_id) { > + ret = mbm_config_write(r, d, evtid, val); > + if (ret) > + return -EINVAL; > + goto next; > + } > + } > + > + return -EINVAL; > +} > + > +static ssize_t mbm_total_config_write(struct kernfs_open_file *of, > + char *buf, size_t nbytes, loff_t off) > +{ > + struct rdt_resource *r = of->kn->parent->priv; > + int ret; > + > + /* Valid input requires a trailing newline */ > + if (nbytes == 0 || buf[nbytes - 1] != '\n') > + return -EINVAL; > + > + rdt_last_cmd_clear(); > + How is this code protected from interference by other actions? Needs rdtgroup_mutex? > + buf[nbytes - 1] = '\0'; > + > + ret = mon_config_parse(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); > + > + return ret ?: nbytes; > +} > + > /* rdtgroup information files for one cache resource. */ > static struct rftype res_common_files[] = { > { > @@ -1597,6 +1742,7 @@ static struct rftype res_common_files[] = { > .mode = 0644, > .kf_ops = &rdtgroup_kf_single_ops, > .seq_show = mbm_total_config_show, > + .write = mbm_total_config_write, > }, > { > .name = "mbm_local_config", > > Reinette