The MBA Software Controller(mba_sc) is a feedback loop that uses measurements of local memory bandwidth to adjust MBA throttling levels to keep workloads in a resctrl group within a target bandwidth set in the schemata file. Users may want to use total memory bandwidth instead of local to handle workloads that have poor NUMA localization. Update the once-per-second polling code to pick a chosen event (local or total memory bandwidth). Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx> --- include/linux/resctrl.h | 2 + arch/x86/kernel/cpu/resctrl/monitor.c | 80 ++++++++++++-------------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 + 3 files changed, 40 insertions(+), 44 deletions(-) diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d94abba1c716..ccb0f50dc18c 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -161,6 +161,7 @@ enum membw_throttle_mode { * @throttle_mode: Bandwidth throttling mode when threads request * different memory bandwidths * @mba_sc: True if MBA software controller(mba_sc) is enabled + * @mba_mbps_event: Monitoring event guiding feedback loop when @mba_sc is true * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ struct resctrl_membw { @@ -170,6 +171,7 @@ struct resctrl_membw { bool arch_needs_linear; enum membw_throttle_mode throttle_mode; bool mba_sc; + enum resctrl_event_id mba_mbps_event; u32 *mb_map; }; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 851b561850e0..2692ce7f708e 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -663,10 +663,11 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) */ static void mbm_bw_count(u32 closid, u32 rmid, struct rmid_read *rr) { - u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid); - struct mbm_state *m = &rr->d->mbm_local[idx]; u64 cur_bw, bytes, cur_bytes; + struct mbm_state *m; + m = get_mbm_state(rr->d, closid, rmid, rr->evtid); + WARN_ON(!m); cur_bytes = rr->val; bytes = cur_bytes - m->prev_bw_bytes; m->prev_bw_bytes = cur_bytes; @@ -752,20 +753,22 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; struct rdt_ctrl_domain *dom_mba; + enum resctrl_event_id evt_id; struct rdt_resource *r_mba; - u32 cur_bw, user_bw, idx; struct list_head *head; struct rdtgroup *entry; + u32 cur_bw, user_bw; - if (!is_mbm_local_enabled()) + if (!is_mbm_enabled()) return; r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + evt_id = r_mba->membw.mba_mbps_event; closid = rgrp->closid; rmid = rgrp->mon.rmid; - idx = resctrl_arch_rmid_idx_encode(closid, rmid); - pmbm_data = &dom_mbm->mbm_local[idx]; + pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id); + WARN_ON(!pmbm_data); dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba); if (!dom_mba) { @@ -784,7 +787,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) */ head = &rgrp->mon.crdtgrp_list; list_for_each_entry(entry, head, mon.crdtgrp_list) { - cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; + cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id); + WARN_ON(!cmbm_data); cur_bw += cmbm_data->prev_bw; } @@ -813,54 +817,42 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm) resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); } -static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, - u32 closid, u32 rmid) +static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid, enum resctrl_event_id evtid) { + struct rdt_resource *rmba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; struct rmid_read rr = {0}; rr.r = r; rr.d = d; + rr.evtid = evtid; + rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); + if (IS_ERR(rr.arch_mon_ctx)) { + pr_warn_ratelimited("Failed to allocate monitor context: %ld", + PTR_ERR(rr.arch_mon_ctx)); + return; + } + __mon_event_count(closid, rmid, &rr); + + if (is_mba_sc(NULL) && rr.evtid == rmba->membw.mba_mbps_event) + mbm_bw_count(closid, rmid, &rr); + + resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); +} + +static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d, + u32 closid, u32 rmid) +{ /* * This is protected from concurrent reads from user * as both the user and we hold the global mutex. */ - if (is_mbm_total_enabled()) { - rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID; - rr.val = 0; - rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); - if (IS_ERR(rr.arch_mon_ctx)) { - pr_warn_ratelimited("Failed to allocate monitor context: %ld", - PTR_ERR(rr.arch_mon_ctx)); - return; - } - - __mon_event_count(closid, rmid, &rr); - - resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); - } - if (is_mbm_local_enabled()) { - rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; - rr.val = 0; - rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid); - if (IS_ERR(rr.arch_mon_ctx)) { - pr_warn_ratelimited("Failed to allocate monitor context: %ld", - PTR_ERR(rr.arch_mon_ctx)); - return; - } - - __mon_event_count(closid, rmid, &rr); - - /* - * Call the MBA software controller only for the - * control groups and when user has enabled - * the software controller explicitly. - */ - if (is_mba_sc(NULL)) - mbm_bw_count(closid, rmid, &rr); + if (is_mbm_total_enabled()) + mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_TOTAL_EVENT_ID); - resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx); - } + if (is_mbm_local_enabled()) + mbm_update_one_event(r, d, closid, rmid, QOS_L3_MBM_LOCAL_EVENT_ID); } /* diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index d7163b764c62..aedb30120d50 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2505,6 +2505,7 @@ static void rdt_disable_ctx(void) static int rdt_enable_ctx(struct rdt_fs_context *ctx) { + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; int ret = 0; if (ctx->enable_cdpl2) { @@ -2520,6 +2521,7 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx) } if (ctx->enable_mba_mbps) { + r->membw.mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID; ret = set_mba_sc(true); if (ret) goto out_cdpl3; -- 2.46.1