Adds module init/exit code to create sysfs attributes for x86 with "hardware_prefetcher_enable", "ip_prefetcher_enable" and "adjacent_cache_line_prefetcher_enable". This driver works only if a CPU model is mapped to type of register specification(e.g. TYPE_L12_BASE) in pfctl_match[]. The details of the registers(MSR_MISC_FEATURE_CONTROL) to be read and written in this patch are described below: "https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html" Volume 4 Signed-off-by: Kohei Tarumizu <tarumizu.kohei@xxxxxxxxxxx> --- arch/x86/kernel/cpu/x86-pfctl.c | 363 ++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 arch/x86/kernel/cpu/x86-pfctl.c diff --git a/arch/x86/kernel/cpu/x86-pfctl.c b/arch/x86/kernel/cpu/x86-pfctl.c new file mode 100644 index 000000000000..154e927d092c --- /dev/null +++ b/arch/x86/kernel/cpu/x86-pfctl.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2022 FUJITSU LIMITED + * + * x86 Hardware Prefetch Control support + */ + +#include <linux/cacheinfo.h> +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/sysfs.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/msr.h> + +/* + * MSR_MISC_FEATURE_CONTROL has three type of register specifications. + * + * The register specification of TYPE_L12_BASE is as follow: + * [0] L2 Hardware Prefetcher Disable (R/W) + * [1] Reserved + * [2] DCU Hardware Prefetcher Disable (R/W) + * [63:3] Reserved + * + * The register specification of TYPE_L12_PLUS is as follow: + * [0] L2 Hardware Prefetcher Disable (R/W) + * [1] L2 Adjacent Cache Line Prefetcher Disable (R/W) + * [2] DCU Hardware Prefetcher Disable (R/W) + * [3] DCU IP Prefetcher Disable (R/W) + * [63:4] Reserved + * + * The register specification of TYPE_L12_XPHI is as follow: + * [0] L2 Hardware Prefetcher Disable (R/W) + * [1] DCU Hardware Prefetcher Disable (R/W) + * [63:2] Reserved + * + * See "Intel 64 and IA-32 Architectures Software Developer's Manual" + * (https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html) + * for register specification details. + */ +enum { + TYPE_L12_BASE, + TYPE_L12_PLUS, + TYPE_L12_XPHI, +}; + +struct x86_pfctl_attr { + struct device_attribute attr; + u64 mask; +}; + +struct pfctl_group { + unsigned int level; + enum cache_type type; + const struct attribute_group **groups; +}; + +enum cpuhp_state hp_online; + +static inline unsigned int pfctl_dev_get_cpu(struct device *pfctl_dev) +{ + return *(u32 *)dev_get_drvdata(pfctl_dev); +} + +static ssize_t +pfctl_show(struct device *pfctl_dev, struct device_attribute *attr, char *buf) +{ + unsigned int cpu = pfctl_dev_get_cpu(pfctl_dev); + struct x86_pfctl_attr *xa; + u64 val; + + xa = container_of(attr, struct x86_pfctl_attr, attr); + + rdmsrl_on_cpu(cpu, MSR_MISC_FEATURE_CONTROL, &val); + return sysfs_emit(buf, "%d\n", val & xa->mask ? 0 : 1); +} + +struct write_info { + u64 mask; + bool enable; +}; + +/* + * wrmsrl() in this patch is only done inside of an interrupt-disabled region + * to avoid a conflict of write access from other drivers, + */ +static void pfctl_write(void *info) +{ + struct write_info *winfo = info; + u64 reg; + + reg = 0; + rdmsrl(MSR_MISC_FEATURE_CONTROL, reg); + + if (winfo->enable) + reg &= ~winfo->mask; + else + reg |= winfo->mask; + + wrmsrl(MSR_MISC_FEATURE_CONTROL, reg); +} + +/* + * MSR_MISC_FEATURE_CONTROL has "core" scope, so define the lock to avoid a + * conflict of write access from different logical processors in the same core. + */ +static DEFINE_MUTEX(pfctl_mutex); + +static ssize_t +pfctl_store(struct device *pfctl_dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned int cpu = pfctl_dev_get_cpu(pfctl_dev); + struct x86_pfctl_attr *xa; + struct write_info info; + + xa = container_of(attr, struct x86_pfctl_attr, attr); + info.mask = xa->mask; + + if (strtobool(buf, &info.enable) < 0) + return -EINVAL; + + mutex_lock(&pfctl_mutex); + smp_call_function_single(cpu, pfctl_write, &info, true); + mutex_unlock(&pfctl_mutex); + + return size; +} + +#define PFCTL_ATTR(_name, _level, _bit) \ + struct x86_pfctl_attr attr_l##_level##_##_name = { \ + .attr = __ATTR(_name, 0600, pfctl_show, pfctl_store), \ + .mask = BIT_ULL(_bit), } + +static PFCTL_ATTR(hardware_prefetcher_enable, 1, 2); +static PFCTL_ATTR(hardware_prefetcher_enable, 2, 0); +static PFCTL_ATTR(ip_prefetcher_enable, 1, 3); +static PFCTL_ATTR(adjacent_cache_line_prefetcher_enable, 2, 1); + +static struct attribute *l1_attrs[] = { + &attr_l1_hardware_prefetcher_enable.attr.attr, + &attr_l1_ip_prefetcher_enable.attr.attr, + NULL, +}; + +static struct attribute *l2_attrs[] = { + &attr_l2_hardware_prefetcher_enable.attr.attr, + &attr_l2_adjacent_cache_line_prefetcher_enable.attr.attr, + NULL, +}; + +static struct attribute_group l1_group = { + .attrs = l1_attrs, +}; + +static struct attribute_group l2_group = { + .attrs = l2_attrs, +}; + +static const struct attribute_group *l1_groups[] = { + &l1_group, + NULL, +}; + +static const struct attribute_group *l2_groups[] = { + &l2_group, + NULL, +}; + +static const struct pfctl_group pfctl_groups[] = { + { + .level = 1, + .type = CACHE_TYPE_DATA, + .groups = l1_groups, + }, + { + .level = 2, + .type = CACHE_TYPE_UNIFIED, + .groups = l2_groups, + }, + { + .groups = NULL, + }, +}; + +static const struct attribute_group ** +get_pfctl_attribute_groups(unsigned int level, enum cache_type type) +{ + int i; + + for (i = 0; pfctl_groups[i].groups; i++) + if ((level == pfctl_groups[i].level) && + (type == pfctl_groups[i].type)) + return pfctl_groups[i].groups; + + return NULL; +} + +static int remove_pfctl_attr(struct device *index_dev, void *data) +{ + struct device *pfctl_dev; + + pfctl_dev = device_find_child_by_name(index_dev, "prefetch_control"); + if (!pfctl_dev) + return 0; + + device_unregister(pfctl_dev); + put_device(pfctl_dev); + + return 0; +} + +static int create_pfctl_attr(struct device *index_dev, void *data) +{ + struct cacheinfo *leaf = dev_get_drvdata(index_dev); + const struct attribute_group **groups; + struct device *pfctl_dev; + + groups = get_pfctl_attribute_groups(leaf->level, leaf->type); + if (!groups) + return 0; + + pfctl_dev = cpu_device_create(index_dev, data, groups, + "prefetch_control"); + if (IS_ERR(pfctl_dev)) + return PTR_ERR(pfctl_dev); + + return 0; +} + +static int pfctl_online(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + struct device *cache_dev; + int ret; + + cache_dev = device_find_child_by_name(cpu_dev, "cache"); + if (!cache_dev) + return -ENODEV; + + ret = device_for_each_child(cache_dev, &cpu_dev->id, create_pfctl_attr); + + put_device(cache_dev); + + return ret; +} + +static int pfctl_prepare_down(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + struct device *cache_dev; + + cache_dev = device_find_child_by_name(cpu_dev, "cache"); + if (!cache_dev) + return 0; + + device_for_each_child(cache_dev, NULL, remove_pfctl_attr); + + put_device(cache_dev); + + return 0; +} + +/* + * Only BROADWELL_X has been tested in the actual machine at this point. Other + * models were defined based on the information in the "Intel 64 and IA-32 + * Architectures Software Developer's Manual" + */ +static const struct x86_cpu_id pfctl_match[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, TYPE_L12_BASE), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, TYPE_L12_BASE), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, TYPE_L12_BASE), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, TYPE_L12_BASE), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, TYPE_L12_PLUS), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, TYPE_L12_XPHI), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, TYPE_L12_XPHI), + {}, +}; +MODULE_DEVICE_TABLE(x86cpu, pfctl_match); + +static int __init x86_pfctl_init(void) +{ + const struct x86_cpu_id *m; + int ret; + + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + m = x86_match_cpu(pfctl_match); + if (!m) + return -ENODEV; + + switch (m->driver_data) { + case TYPE_L12_BASE: + l1_attrs[1] = NULL; + l2_attrs[1] = NULL; + break; + case TYPE_L12_PLUS: + break; + case TYPE_L12_XPHI: + attr_l1_hardware_prefetcher_enable.mask = BIT_ULL(1); + l1_attrs[1] = NULL; + l2_attrs[1] = NULL; + break; + default: + return -ENODEV; + }; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/x86-pfctl:online", + pfctl_online, pfctl_prepare_down); + if (ret < 0) { + pr_err("failed to register hotplug callbacks\n"); + return ret; + } + + hp_online = ret; + + return 0; +} + +static void __exit x86_pfctl_exit(void) +{ + cpuhp_remove_state(hp_online); +} + +late_initcall(x86_pfctl_init); +module_exit(x86_pfctl_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("FUJITSU LIMITED"); +MODULE_DESCRIPTION("x86 Hardware Prefetch Control Driver"); -- 2.27.0