Hi Babu,
On 7/3/24 2:48 PM, Babu Moger wrote:
The ABMC feature provides an option to the user to assign a hardware
counter to an RMID and monitor the bandwidth as long as it is assigned.
The assigned RMID will be tracked by the hardware until the user unassigns
it manually.
Individual counters are configured by writing to L3_QOS_ABMC_CFG MSR
and specifying the counter id, bandwidth source, and bandwidth types.
Provide the interface to assign the counter ids to RMID.
Again this is a mix of a couple of layers where this single patch
introduces fs code (mbm_cntr_alloc() and rdtgroup_assign_cntr()) as well
as architecture specific code (resctrl_arch_assign_cntr() and rdtgroup_abmc_cfg()).
Lumping this all together without any guidance to reader makes this very difficult
to navigate. This work needs to be split into fs and arch parts with
clear descriptions of how the layers interact.
The feature details are documented in the APM listed below [1].
[1] AMD64 Architecture Programmer's Manual Volume 2: System Programming
Publication # 24593 Revision 3.41 section 19.3.3.3 Assignable Bandwidth
Monitoring (ABMC).
Signed-off-by: Babu Moger <babu.moger@xxxxxxx>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
---
v5: Few name changes to match cntr_id.
Changed the function names to
rdtgroup_assign_cntr
resctr_arch_assign_cntr
More comments on commit log.
Added function summary.
v4: Commit message update.
User bitmap APIs where applicable.
Changed the interfaces considering MPAM(arm).
Added domain specific assignment.
v3: Removed the static from the prototype of rdtgroup_assign_abmc.
The function is not called directly from user anymore. These
changes are related to global assignment interface.
v2: Minor text changes in commit message.
---
arch/x86/kernel/cpu/resctrl/internal.h | 3 +
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 96 ++++++++++++++++++++++++++
2 files changed, 99 insertions(+)
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 6925c947682d..66460375056c 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -708,6 +708,9 @@ void __init resctrl_file_fflags_init(const char *config,
unsigned long fflags);
void resctrl_arch_mbm_evt_config(struct rdt_hw_mon_domain *hw_dom);
unsigned int mon_event_config_index_get(u32 evtid);
+int resctrl_arch_assign_cntr(struct rdt_mon_domain *d, u32 evtid, u32 rmid,
+ u32 cntr_id, u32 closid, bool enable);
+int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, u32 evtid);
void rdt_staged_configs_clear(void);
bool closid_allocated(unsigned int closid);
int resctrl_find_cleanest_closid(void);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index d2663f1345b7..44f6eff42c30 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -202,6 +202,19 @@ static void mbm_cntrs_init(void)
mbm_cntrs_free_map_len = r->mon.num_mbm_cntrs;
}
+static int mbm_cntr_alloc(void)
+{
+ u32 cntr_id = find_first_bit(&mbm_cntrs_free_map,
+ mbm_cntrs_free_map_len);
+
+ if (cntr_id >= mbm_cntrs_free_map_len)
+ return -ENOSPC;
+
+ __clear_bit(cntr_id, &mbm_cntrs_free_map);
+
+ return cntr_id;
+}
+
/**
* rdtgroup_mode_by_closid - Return mode of resource group with closid
* @closid: closid if the resource group
@@ -1860,6 +1873,89 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
return ret ?: nbytes;
}
+static void rdtgroup_abmc_cfg(void *info)
+{
+ u64 *msrval = info;
+
+ wrmsrl(MSR_IA32_L3_QOS_ABMC_CFG, *msrval);
+}
+
+/*
+ * Send an IPI to the domain to assign the counter id to RMID.
+ */
+int resctrl_arch_assign_cntr(struct rdt_mon_domain *d, u32 evtid, u32 rmid,
u32 evtid -> enum resctrl_event_id evtid
+ u32 cntr_id, u32 closid, bool enable)
+{
+ struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+ union l3_qos_abmc_cfg abmc_cfg = { 0 };
+ struct arch_mbm_state *arch_mbm;
+
+ abmc_cfg.split.cfg_en = 1;
+ abmc_cfg.split.cntr_en = enable ? 1 : 0;
+ abmc_cfg.split.cntr_id = cntr_id;
+ abmc_cfg.split.bw_src = rmid;
+
+ /* Update the event configuration from the domain */
+ if (evtid == QOS_L3_MBM_TOTAL_EVENT_ID) {
+ abmc_cfg.split.bw_type = hw_dom->mbm_total_cfg;
+ arch_mbm = &hw_dom->arch_mbm_total[rmid];
+ } else {
+ abmc_cfg.split.bw_type = hw_dom->mbm_local_cfg;
+ arch_mbm = &hw_dom->arch_mbm_local[rmid];
+ }
+
+ smp_call_function_any(&d->hdr.cpu_mask, rdtgroup_abmc_cfg, &abmc_cfg, 1);
+
+ /*
+ * Reset the architectural state so that reading of hardware
+ * counter is not considered as an overflow in next update.
+ */
+ if (arch_mbm)
+ memset(arch_mbm, 0, sizeof(struct arch_mbm_state));
+
+ return 0;
+}
+
+/*
+ * Assign a hardware counter id to the group. Allocate a new counter id
+ * if the event is unassigned.
+ */
+int rdtgroup_assign_cntr(struct rdtgroup *rdtgrp, u32 evtid)
u32 evtid -> enum resctrl_event_id evtid
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ int cntr_id = 0, index;
+ struct rdt_mon_domain *d;
reverse fir
+
+ index = mon_event_config_index_get(evtid);
+ if (index == INVALID_CONFIG_INDEX) {
+ rdt_last_cmd_puts("Invalid event id\n");
This is a kernel bug and can be a WARN (once) instead. No need to message user space.
+ return -EINVAL;
+ }
+
+ /* Nothing to do if event has been assigned already */
+ if (rdtgrp->mon.cntr_id[index] != MON_CNTR_UNSET) {
+ rdt_last_cmd_puts("ABMC counter is assigned already\n");
+ return 0;
+ }
+
+ /*
+ * Allocate a new counter id and update domains
+ */
+ cntr_id = mbm_cntr_alloc();
+ if (cntr_id < 0) {
+ rdt_last_cmd_puts("Out of ABMC counters\n");
+ return -ENOSPC;
+ }
+
+ rdtgrp->mon.cntr_id[index] = cntr_id;
+
+ list_for_each_entry(d, &r->mon_domains, hdr.list)
+ resctrl_arch_assign_cntr(d, evtid, rdtgrp->mon.rmid,
+ cntr_id, rdtgrp->closid, 1);
+
+ return 0;
+}
+
/* rdtgroup information files for one cache resource. */
static struct rftype res_common_files[] = {
{
Reinette