Hi Tony, On Thu, Dec 7, 2023 at 11:56 AM Tony Luck <tony.luck@xxxxxxxxx> wrote: > > The MBA Software Controller(mba_sc) is a feedback loop that uses > measurements of local memory bandwidth to adjust MBA throttling levels > to keep workloads in a resctrl group within a target bandwidth set in > the schemata file. > > Users may want to use total memory bandwidth instead of local to handle > workloads that have poor NUMA localization. > > Add a new mount option "mba_MBps_event={event_name}" where event_name > is one of "mbm_Local_bytes" or "mbm_total_bytes" that allows a user to It's "mbm_local_bytes" in the matching logic later on. > specify which monitoring event to use. > > Update the once-per-second polling code to use the chosen event (local > or total memory bandwidth). > > Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx> > --- > include/linux/resctrl.h | 2 + > arch/x86/kernel/cpu/resctrl/internal.h | 3 +- > arch/x86/kernel/cpu/resctrl/monitor.c | 21 +++++---- > arch/x86/kernel/cpu/resctrl/rdtgroup.c | 61 +++++++++++++++++++++----- > 4 files changed, 63 insertions(+), 24 deletions(-) > > diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h > index 66942d7fba7f..1feb3b2e64fa 100644 > --- a/include/linux/resctrl.h > +++ b/include/linux/resctrl.h > @@ -129,6 +129,7 @@ enum membw_throttle_mode { > * @throttle_mode: Bandwidth throttling mode when threads request > * different memory bandwidths > * @mba_sc: True if MBA software controller(mba_sc) is enabled > + * @mba_mbps_event: Event (local or total) for mba_sc > * @mb_map: Mapping of memory B/W percentage to memory B/W delay > */ > struct resctrl_membw { > @@ -138,6 +139,7 @@ struct resctrl_membw { > bool arch_needs_linear; > enum membw_throttle_mode throttle_mode; > bool mba_sc; > + enum resctrl_event_id mba_mbps_event; > u32 *mb_map; > }; > > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h > index a4f1aa15f0a2..8b9b8f664324 100644 > --- a/arch/x86/kernel/cpu/resctrl/internal.h > +++ b/arch/x86/kernel/cpu/resctrl/internal.h > @@ -58,7 +58,8 @@ struct rdt_fs_context { > struct kernfs_fs_context kfc; > bool enable_cdpl2; > bool enable_cdpl3; > - bool enable_mba_mbps; > + bool enable_mba_mbps_local; > + bool enable_mba_mbps_total; > bool enable_debug; > }; > > diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c > index f136ac046851..d9e590f1cbc3 100644 > --- a/arch/x86/kernel/cpu/resctrl/monitor.c > +++ b/arch/x86/kernel/cpu/resctrl/monitor.c > @@ -431,9 +431,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) > */ > static void mbm_bw_count(u32 rmid, struct rmid_read *rr) > { > - struct mbm_state *m = &rr->d->mbm_local[rmid]; > u64 cur_bw, bytes, cur_bytes; > + struct mbm_state *m; > > + m = get_mbm_state(rr->d, rmid, rr->evtid); WARN_ON(m == NULL) since we assume the caller has confirmed rr->evtid is an MBM event? > cur_bytes = rr->val; > bytes = cur_bytes - m->prev_bw_bytes; > m->prev_bw_bytes = cur_bytes; > @@ -521,19 +522,21 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) > u32 closid, rmid, cur_msr_val, new_msr_val; > struct mbm_state *pmbm_data, *cmbm_data; > u32 cur_bw, delta_bw, user_bw; > + enum resctrl_event_id evt_id; > struct rdt_resource *r_mba; > struct rdt_domain *dom_mba; > struct list_head *head; > struct rdtgroup *entry; > > - if (!is_mbm_local_enabled()) > + if (!is_mbm_enabled()) > return; > > r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; > + evt_id = r_mba->membw.mba_mbps_event; > > closid = rgrp->closid; > rmid = rgrp->mon.rmid; > - pmbm_data = &dom_mbm->mbm_local[rmid]; > + pmbm_data = get_mbm_state(dom_mbm, rmid, evt_id); One defensive WARN_ON((!pmbm_data) for this function to ensure evt_id is valid for this call and the ones in the loop below? > > dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba); > if (!dom_mba) { > @@ -553,7 +556,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) > */ > head = &rgrp->mon.crdtgrp_list; > list_for_each_entry(entry, head, mon.crdtgrp_list) { > - cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; > + cmbm_data = get_mbm_state(dom_mbm, entry->mon.rmid, evt_id); > cur_bw += cmbm_data->prev_bw; > delta_bw += cmbm_data->delta_bw; > } > @@ -616,18 +619,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) > rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID; > rr.val = 0; > __mon_event_count(rmid, &rr); > + if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event) > + mbm_bw_count(rmid, &rr); > } > if (is_mbm_local_enabled()) { > rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; > rr.val = 0; > __mon_event_count(rmid, &rr); > - > - /* > - * Call the MBA software controller only for the > - * control groups and when user has enabled > - * the software controller explicitly. > - */ > - if (is_mba_sc(NULL)) > + if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event) > mbm_bw_count(rmid, &rr); > } > } > diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > index 69a1de92384a..5f64a0b2597c 100644 > --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c > +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > @@ -2294,7 +2294,7 @@ static bool supports_mba_mbps(void) > { > struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; > > - return (is_mbm_local_enabled() && > + return (is_mbm_enabled() && > r->alloc_capable && is_mba_linear()); > } > > @@ -2302,7 +2302,7 @@ static bool supports_mba_mbps(void) > * Enable or disable the MBA software controller > * which helps user specify bandwidth in MBps. > */ > -static int set_mba_sc(bool mba_sc) > +static int set_mba_sc(bool mba_sc, enum resctrl_event_id mba_mbps_event) > { > struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; > u32 num_closid = resctrl_arch_get_num_closid(r); > @@ -2313,6 +2313,7 @@ static int set_mba_sc(bool mba_sc) > return -EINVAL; > > r->membw.mba_sc = mba_sc; > + r->membw.mba_mbps_event = mba_mbps_event; > > list_for_each_entry(d, &r->domains, list) { > for (i = 0; i < num_closid; i++) > @@ -2445,13 +2446,14 @@ static void rdt_disable_ctx(void) > { > resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false); > resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false); > - set_mba_sc(false); > + set_mba_sc(false, QOS_L3_MBM_LOCAL_EVENT_ID); > > resctrl_debug = false; > } > > static int rdt_enable_ctx(struct rdt_fs_context *ctx) > { > + enum resctrl_event_id mba_mbps_event; > int ret = 0; > > if (ctx->enable_cdpl2) { > @@ -2466,8 +2468,12 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) > goto out_cdpl2; > } > > - if (ctx->enable_mba_mbps) { > - ret = set_mba_sc(true); > + if (ctx->enable_mba_mbps_local || ctx->enable_mba_mbps_total) { > + if (ctx->enable_mba_mbps_total) > + mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID; > + else > + mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID; Total takes precedence over local when the user picks both. > + ret = set_mba_sc(true, mba_mbps_event); > if (ret) > goto out_cdpl3; > } > @@ -2683,15 +2689,17 @@ enum rdt_param { > Opt_cdp, > Opt_cdpl2, > Opt_mba_mbps, > + Opt_mba_mbps_event, > Opt_debug, > nr__rdt_params > }; > > static const struct fs_parameter_spec rdt_fs_parameters[] = { > - fsparam_flag("cdp", Opt_cdp), > - fsparam_flag("cdpl2", Opt_cdpl2), > - fsparam_flag("mba_MBps", Opt_mba_mbps), > - fsparam_flag("debug", Opt_debug), > + fsparam_flag("cdp", Opt_cdp), > + fsparam_flag("cdpl2", Opt_cdpl2), > + fsparam_flag("mba_MBps", Opt_mba_mbps), > + fsparam_string("mba_MBps_event", Opt_mba_mbps_event), > + fsparam_flag("debug", Opt_debug), > {} > }; > > @@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param) > case Opt_mba_mbps: > if (!supports_mba_mbps()) > return -EINVAL; > - ctx->enable_mba_mbps = true; > + if (is_mbm_local_enabled()) > + ctx->enable_mba_mbps_local = true; > + else > + return -EINVAL; > + return 0; > + case Opt_mba_mbps_event: > + if (!supports_mba_mbps()) > + return -EINVAL; > + if (!strcmp("mbm_local_bytes", param->string)) { > + if (!is_mbm_local_enabled()) > + return -EINVAL; > + ctx->enable_mba_mbps_local = true; > + } else if (!strcmp("mbm_total_bytes", param->string)) { > + if (!is_mbm_total_enabled()) > + return -EINVAL; > + ctx->enable_mba_mbps_total = true; > + } else { > + return -EINVAL; It looks like if I pass "mba_MBps_event=mbm_total_bytes,mba_MBps_event=mbm_local_bytes" I can set both flags true. > + } > return 0; > case Opt_debug: > ctx->enable_debug = true; > @@ -3780,16 +3806,27 @@ static int rdtgroup_rename(struct kernfs_node *kn, > return ret; > } > > +static char *mba_sc_event_opt_name(struct rdt_resource *r) > +{ > + if (r->membw.mba_mbps_event == QOS_L3_MBM_LOCAL_EVENT_ID) > + return ",mba_MBps_event=mbm_local_bytes"; > + else if (r->membw.mba_mbps_event == QOS_L3_MBM_TOTAL_EVENT_ID) > + return ",mba_MBps_event=mbm_total_bytes"; > + return ""; > +} > + > static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf) > { > + struct rdt_resource *r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; > + > if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3)) > seq_puts(seq, ",cdp"); > > if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2)) > seq_puts(seq, ",cdpl2"); > > - if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl)) > - seq_puts(seq, ",mba_MBps"); > + if (is_mba_sc(r_mba)) > + seq_puts(seq, mba_sc_event_opt_name(r_mba)); > > if (resctrl_debug) > seq_puts(seq, ",debug"); > -- > 2.41.0 > Consider the setting-both-events quirk and a little bit of defensive programming for get_mbm_data() returning NULL. Assuming the case of "Local" is fixed in the commit message: Reviewed-by: Peter Newman <peternewman@xxxxxxxxxx> Thanks!