Hi Reinette, On 10/25/22 18:48, Reinette Chatre wrote: > Hi Babu, > > On 10/17/2022 3:27 PM, Babu Moger wrote: >> The current event configuration can be changed by the user by writing >> to the configuration file /sys/fs/resctrl/info/L3_MON/mbm_total_config. >> The event configuration settings are domain specific and will affect all >> the CPUs in the domain. >> >> Following are the types of events supported: >> >> ==== =========================================================== >> Bits Description >> ==== =========================================================== >> 6 Dirty Victims from the QOS domain to all types of memory >> 5 Reads to slow memory in the non-local NUMA domain >> 4 Reads to slow memory in the local NUMA domain >> 3 Non-temporal writes to non-local NUMA domain >> 2 Non-temporal writes to local NUMA domain >> 1 Reads to memory in the non-local NUMA domain >> 0 Reads to memory in the local NUMA domain >> ==== =========================================================== >> >> For example: >> To change the mbm_total_bytes to count only reads on domain 0, the bits >> 0, 1, 4 and 5 needs to be set, which is 110011b (in hex 0x33). Run the >> command. >> $echo 0=0x33 > /sys/fs/resctrl/info/L3_MON/mbm_total_config >> >> To change the mbm_total_bytes to count all the slow memory reads on >> domain 1, the bits 4 and 5 needs to be set which is 110000b (in hex 0x30). >> Run the command. >> $echo 1=0x30 > /sys/fs/resctrl/info/L3_MON/mbm_total_config >> >> Signed-off-by: Babu Moger <babu.moger@xxxxxxx> >> --- >> arch/x86/kernel/cpu/resctrl/internal.h | 23 +++++ >> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 146 ++++++++++++++++++++++++++++++++ >> 2 files changed, 169 insertions(+) >> >> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h >> index 326a1b582f38..c42b12934a0e 100644 >> --- a/arch/x86/kernel/cpu/resctrl/internal.h >> +++ b/arch/x86/kernel/cpu/resctrl/internal.h >> @@ -42,6 +42,29 @@ >> */ >> #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) >> >> +/* Reads to Local DRAM Memory */ >> +#define READS_TO_LOCAL_MEM BIT(0) >> + >> +/* Reads to Remote DRAM Memory */ >> +#define READS_TO_REMOTE_MEM BIT(1) >> + >> +/* Non-Temporal Writes to Local Memory */ >> +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) >> + >> +/* Non-Temporal Writes to Remote Memory */ >> +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) >> + >> +/* Reads to Local Memory the system identifies as "Slow Memory" */ >> +#define READS_TO_LOCAL_S_MEM BIT(4) >> + >> +/* Reads to Remote Memory the system identifies as "Slow Memory" */ >> +#define READS_TO_REMOTE_S_MEM BIT(5) >> + >> +/* Dirty Victims to All Types of Memory */ >> +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) >> + >> +/* Max event bits supported */ >> +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) >> >> struct rdt_fs_context { >> struct kernfs_fs_context kfc; >> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> index 305fb0475970..25ff56ecb817 100644 >> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> @@ -1494,6 +1494,151 @@ static int mbm_local_config_show(struct kernfs_open_file *of, >> return 0; >> } >> >> +static void mon_event_config_write(void *info) >> +{ >> + struct mon_config_info *mon_info = info; >> + u32 msr_index; >> + >> + switch (mon_info->evtid) { >> + case QOS_L3_MBM_TOTAL_EVENT_ID: >> + msr_index = 0; >> + break; >> + case QOS_L3_MBM_LOCAL_EVENT_ID: >> + msr_index = 1; >> + break; >> + default: >> + /* Not expected to come here */ >> + return; >> + } >> + >> + wrmsr(MSR_IA32_EVT_CFG_BASE + msr_index, mon_info->mon_config, 0); >> +} > This duplicates most of mon_event_config_read() from earlier patch. Would a > utility like "mon_event_config_index_get()" help here? Yes. We can do that. > >> + >> +static int mbm_config_write(struct rdt_resource *r, struct rdt_domain *d, >> + u32 evtid, u32 val) >> +{ >> + struct mon_config_info mon_info = {0}; >> + cpumask_var_t cpu_mask; >> + int ret = 0, cpu; >> + >> + rdt_last_cmd_clear(); >> + >> + /* mon_config cannot be more than the supported set of events */ >> + if (val > MAX_EVT_CONFIG_BITS) { >> + rdt_last_cmd_puts("Invalid event configuration\n"); >> + return -EINVAL; >> + } >> + >> + cpus_read_lock(); >> + >> + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) { >> + rdt_last_cmd_puts("cpu_mask allocation failed\n"); >> + ret = -ENOMEM; >> + goto e_unlock; >> + } >> + >> + /* >> + * Read the current config value first. If both are same then >> + * we dont need to write it again. > "we dont need" -> "we don't need" Sure. > >> + */ >> + mon_info.evtid = evtid; >> + mondata_config_read(d, &mon_info); >> + if (mon_info.mon_config == val) >> + goto e_cpumask; >> + >> + mon_info.mon_config = val; >> + >> + /* Pick all the CPUs in the domain instance */ >> + for_each_cpu(cpu, &d->cpu_mask) >> + cpumask_set_cpu(cpu, cpu_mask); >> + > Why is it necessary to create a new CPU mask just to populate it > with all CPUs in another CPU mask? Why not just use the original CPU mask? I thought convention was to create a new mask when calling on_each_cpu_mask(). But, I dont see that requirement. I can use d->cpu_mask for this directly. > >> + /* >> + * Update MSR_IA32_EVT_CFG_BASE MSRs on all the CPUs in >> + * cpu_mask. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE >> + * are scoped at the domain level. Writing any of these MSRs >> + * on one CPU is supposed to be observed by all CPUs in the >> + * domain. However, the hardware team recommends to update >> + * these MSRs on all the CPUs in the domain. >> + */ >> + on_each_cpu_mask(cpu_mask, mon_event_config_write, &mon_info, 1); >> + >> + /* >> + * When an Event Configuration is changed, the bandwidth counters >> + * for all RMIDs and Events will be cleared by the hardware. The >> + * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for >> + * every RMID on the next read to any event for every RMID. >> + * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62) >> + * cleared while it is tracked by the hardware. Clear the >> + * mbm_local and mbm_total counts for all the RMIDs. >> + */ >> + memset(d->mbm_local, 0, sizeof(struct mbm_state) * r->num_rmid); >> + memset(d->mbm_total, 0, sizeof(struct mbm_state) * r->num_rmid); >> + >> +e_cpumask: >> + free_cpumask_var(cpu_mask); >> + >> +e_unlock: >> + cpus_read_unlock(); >> + >> + return ret; >> +} >> + >> +static int mon_config_parse(struct rdt_resource *r, char *tok, u32 evtid) >> +{ >> + char *dom_str = NULL, *id_str; >> + struct rdt_domain *d; >> + unsigned long dom_id, val; > Please use reverse fir tree order. Sure. > >> + int ret = 0; >> + >> +next: >> + if (!tok || tok[0] == '\0') >> + return 0; >> + >> + /* Start processing the strings for each domain */ >> + dom_str = strim(strsep(&tok, ";")); >> + id_str = strsep(&dom_str, "="); >> + >> + if (!dom_str || kstrtoul(id_str, 10, &dom_id)) { >> + rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n"); >> + return -EINVAL; >> + } >> + >> + if (!dom_str || kstrtoul(dom_str, 16, &val)) { >> + rdt_last_cmd_puts("Missing '=' or non-numeric event configuration value\n"); >> + return -EINVAL; >> + } >> + >> + list_for_each_entry(d, &r->domains, list) { >> + if (d->id == dom_id) { >> + ret = mbm_config_write(r, d, evtid, val); >> + if (ret) >> + return -EINVAL; >> + goto next; >> + } >> + } >> + >> + return -EINVAL; >> +} >> + >> +static ssize_t mbm_total_config_write(struct kernfs_open_file *of, >> + char *buf, size_t nbytes, loff_t off) >> +{ >> + struct rdt_resource *r = of->kn->parent->priv; >> + int ret; >> + >> + /* Valid input requires a trailing newline */ >> + if (nbytes == 0 || buf[nbytes - 1] != '\n') >> + return -EINVAL; >> + >> + rdt_last_cmd_clear(); >> + > How is this code protected from interference by other actions? Needs rdtgroup_mutex? Yes. Sure. Thanks Babu > >> + buf[nbytes - 1] = '\0'; >> + >> + ret = mon_config_parse(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID); >> + >> + return ret ?: nbytes; >> +} >> + >> /* rdtgroup information files for one cache resource. */ >> static struct rftype res_common_files[] = { >> { >> @@ -1597,6 +1742,7 @@ static struct rftype res_common_files[] = { >> .mode = 0644, >> .kf_ops = &rdtgroup_kf_single_ops, >> .seq_show = mbm_total_config_show, >> + .write = mbm_total_config_write, >> }, >> { >> .name = "mbm_local_config", >> >> > Reinette -- Thanks Babu Moger