Hi Reinette, On 9/13/24 15:46, Reinette Chatre wrote: > Hi Babu, > > On 8/16/24 9:16 AM, Babu Moger wrote: >> SDCIAE feature can be enabled by setting bit 1 in MSR L3_QOS_EXT_CFG. >> When the state of SDCIAE is changed, it must be changed to the updated >> value on all logical processors in the QOS Domain. By default, the SDCIAE >> feature is disabled. >> >> Introduce arch handlers to detect and enable/disable the feature. >> >> The SDCIAE feature details are available in APM listed below [1]. >> [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming >> Publication # 24593 Revision 3.41 section 19.4.7 L3 Smart Data Cache >> Injection Allocation Enforcement (SDCIAE) >> >> Signed-off-by: Babu Moger <babu.moger@xxxxxxx> >> Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 >> --- >> arch/x86/include/asm/msr-index.h | 1 + >> arch/x86/kernel/cpu/resctrl/internal.h | 12 +++++ >> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 61 ++++++++++++++++++++++++++ >> 3 files changed, 74 insertions(+) >> >> diff --git a/arch/x86/include/asm/msr-index.h >> b/arch/x86/include/asm/msr-index.h >> index 82c6a4d350e0..c78afed3c21f 100644 >> --- a/arch/x86/include/asm/msr-index.h >> +++ b/arch/x86/include/asm/msr-index.h >> @@ -1181,6 +1181,7 @@ >> /* - AMD: */ >> #define MSR_IA32_MBA_BW_BASE 0xc0000200 >> #define MSR_IA32_SMBA_BW_BASE 0xc0000280 >> +#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff >> #define MSR_IA32_EVT_CFG_BASE 0xc0000400 >> /* MSR_IA32_VMX_MISC bits */ >> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h >> b/arch/x86/kernel/cpu/resctrl/internal.h >> index 955999aecfca..ceb0e8e1ed76 100644 >> --- a/arch/x86/kernel/cpu/resctrl/internal.h >> +++ b/arch/x86/kernel/cpu/resctrl/internal.h >> @@ -56,6 +56,9 @@ >> /* Max event bits supported */ >> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) >> +/* Setting bit 1 in L3_QOS_EXT_CFG enables the SDCIAE feature. */ >> +#define SDCIAE_ENABLE_BIT 1 >> + >> /** >> * cpumask_any_housekeeping() - Choose any CPU in @mask, preferring >> those that >> * aren't marked nohz_full >> @@ -477,6 +480,7 @@ struct rdt_parse_data { >> * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth >> * Monitoring Event Configuration (BMEC) is supported. >> * @cdp_enabled: CDP state of this resource >> + * @sdciae_enabled: SDCIAE feature is enabled >> * >> * Members of this structure are either private to the architecture >> * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g. >> @@ -491,6 +495,7 @@ struct rdt_hw_resource { >> unsigned int mbm_width; >> unsigned int mbm_cfg_mask; >> bool cdp_enabled; >> + bool sdciae_enabled; >> }; >> static inline struct rdt_hw_resource *resctrl_to_arch_res(struct >> rdt_resource *r) >> @@ -536,6 +541,13 @@ int resctrl_arch_set_cdp_enabled(enum >> resctrl_res_level l, bool enable); >> void arch_mon_domain_online(struct rdt_resource *r, struct >> rdt_mon_domain *d); >> +static inline bool resctrl_arch_get_sdciae_enabled(enum >> resctrl_res_level l) >> +{ >> + return rdt_resources_all[l].sdciae_enabled; >> +} >> + >> +int resctrl_arch_set_sdciae_enabled(enum resctrl_res_level l, bool >> enable); >> + >> /* >> * To return the common struct rdt_resource, which is contained in struct >> * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource. >> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> b/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> index d7163b764c62..c62d6183bfe4 100644 >> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> @@ -1789,6 +1789,67 @@ static ssize_t >> mbm_local_bytes_config_write(struct kernfs_open_file *of, >> return ret ?: nbytes; >> } >> +static void resctrl_sdciae_msrwrite(void *arg) >> +{ >> + bool *enable = arg; >> + >> + if (*enable) >> + msr_set_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); >> + else >> + msr_clear_bit(MSR_IA32_L3_QOS_EXT_CFG, SDCIAE_ENABLE_BIT); >> +} > > (hmmm ... so there is an effort to make the rest of the code not be > resource specific ... but then the lowest level has L3 MSR hardcoded) Not very clear on this. I can separate the patch into two, one arch specific and the other FS specific. > >> + >> +static int resctrl_sdciae_setup(enum resctrl_res_level l, bool enable) >> +{ >> + struct rdt_resource *r = &rdt_resources_all[l].r_resctrl; >> + struct rdt_ctrl_domain *d; >> + >> + /* Update L3_QOS_EXT_CFG MSR on all the CPUs in all domains*/ > > (please note some space issues above) Sure. > >> + list_for_each_entry(d, &r->ctrl_domains, hdr.list) >> + on_each_cpu_mask(&d->hdr.cpu_mask, resctrl_sdciae_msrwrite, >> &enable, 1); >> + >> + return 0; > > It seems that this will be inside the arch specific code while > resctrl_arch_set_sdciae_enabled() will be called by resctrl fs code. It is > thus not clear why above returns an int, thus forcing callers to do > error code handling, when it will always just return 0. Yes. It is returning 0 right now. Keeps the options open for other arch's report error. Looks like we heading to make this generic feature. > >> +} >> + >> +static int resctrl_sdciae_enable(enum resctrl_res_level l) >> +{ >> + struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; >> + int ret = 0; >> + >> + if (!hw_res->sdciae_enabled) { >> + ret = resctrl_sdciae_setup(l, true); >> + if (!ret) >> + hw_res->sdciae_enabled = true; >> + } >> + >> + return ret; > > Same here ... this will always return 0, no? Yes. it is returns 0 always on AMD. Keeps the options open for other arch's report error. > >> +} >> + >> +static void resctrl_sdciae_disable(enum resctrl_res_level l) >> +{ >> + struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; >> + >> + if (hw_res->sdciae_enabled) { >> + resctrl_sdciae_setup(l, false); >> + hw_res->sdciae_enabled = false; >> + } >> +} >> + >> +int resctrl_arch_set_sdciae_enabled(enum resctrl_res_level l, bool enable) >> +{ >> + struct rdt_hw_resource *hw_res = &rdt_resources_all[l]; >> + >> + if (!hw_res->r_resctrl.sdciae_capable) >> + return -EINVAL; >> + >> + if (enable) >> + return resctrl_sdciae_enable(l); >> + >> + resctrl_sdciae_disable(l); >> + >> + return 0; >> +} >> + >> /* rdtgroup information files for one cache resource. */ >> static struct rftype res_common_files[] = { >> { > > Reinette > -- Thanks Babu Moger