From: Sudeep Holla <sudeep.holla@xxxxxxx> This patch adds initial support for providing processor cache information to userspace through sysfs interface. This is based on already existing implementations(x86, ia64, s390 and powerpc) and hence the interface is intended to be fully compatible. The main purpose of this generic support is to avoid further code duplication to support new architectures and also to unify all the existing different implementations. This implementation maintains the hierarchy of cache objects which reflects the system's cache topology. Cache objects are instantiated as needed as CPUs come online. The cache objects are replicated per-cpu even if they are shared. A per-cpu array of cache information maintained is used mainly for sysfs-related book keeping. It also implements the shared_cpu_map attribute, which is essential for enabling both kernel and user-space to discover the system's overall cache topology. This patch also add the missing ABI documentation for the cacheinfo sysfs interface already, which is well defined and widely used. Signed-off-by: Sudeep Holla <sudeep.holla@xxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: Rob Herring <robh@xxxxxxxxxx> Cc: linux-doc@xxxxxxxxxxxxxxx --- Documentation/ABI/testing/sysfs-devices-system-cpu | 40 ++ drivers/base/Makefile | 2 +- drivers/base/cacheinfo.c | 520 +++++++++++++++++++++ include/linux/cacheinfo.h | 60 +++ 4 files changed, 621 insertions(+), 1 deletion(-) create mode 100644 drivers/base/cacheinfo.c create mode 100644 include/linux/cacheinfo.h diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index d5a0d33..dabe03e 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -224,3 +224,43 @@ Description: Parameters for the Intel P-state driver frequency range. More details can be found in Documentation/cpu-freq/intel-pstate.txt + +What: /sys/devices/system/cpu/cpu*/cache/index*/<set_of_attributes_mentioned_below> +Date: February 2014 +Contact: Linux kernel mailing list <linux-kernel@xxxxxxxxxxxxxxx> +Description: Parameters for the CPU cache attributes + + attributes: + - writethrough: data is written to both the cache line + and to the block in the lower-level memory + - writeback: data is written only to the cache line and + the modified cache line is written to main + memory only when it is replaced + - writeallocate: allocate a memory location to a cache line + on a cache miss because of a write + - readallocate: allocate a memory location to a cache line + on a cache miss because of a read + + coherency_line_size: the minimum amount of data that gets transferred + + level: the cache hierarcy in the multi-level cache configuration + + number_of_sets: total number of sets in the cache, a set is a + collection of cache lines with the same cache index + + physical_line_partition: number of physical cache line per cache tag + + shared_cpu_list: the list of cpus sharing the cache + + shared_cpu_map: logical cpu mask containing the list of cpus sharing + the cache + + size: the total cache size in kB + + type: + - instruction: cache that only holds instructions + - data: cache that only caches data + - unified: cache that holds both data and instructions + + ways_of_associativity: degree of freedom in placing a particular block + of memory in the cache diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 04b314e..bad2ff8 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -4,7 +4,7 @@ obj-y := component.o core.o bus.o dd.o syscore.o \ driver.o class.o platform.o \ cpu.o firmware.o init.o map.o devres.o \ attribute_container.o transport_class.o \ - topology.o container.o + topology.o container.o cacheinfo.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c new file mode 100644 index 0000000..43bbd34 --- /dev/null +++ b/drivers/base/cacheinfo.c @@ -0,0 +1,520 @@ +/* + * cacheinfo support - processor cache information via sysfs + * + * Based on arch/x86/kernel/cpu/intel_cacheinfo.c + * Author: Sudeep Holla <sudeep.holla@xxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/bitops.h> +#include <linux/cacheinfo.h> +#include <linux/compiler.h> +#include <linux/cpu.h> +#include <linux/device.h> +#include <linux/init.h> +#include <linux/kobject.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/sysfs.h> + +/* pointer to cpu_cacheinfo array (for each cache leaf) */ +static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cache_info); +#define ci_cacheinfo(cpu) (&per_cpu(ci_cpu_cache_info, cpu)) +#define cache_leaves(cpu) (ci_cacheinfo(cpu)->num_leaves) +#define per_cpu_cacheinfo(cpu) (ci_cacheinfo(cpu)->info_list) + +struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) +{ + return ci_cacheinfo(cpu); +} + +#ifdef CONFIG_OF +static int cache_setup_of_node(unsigned int cpu) +{ + struct device_node *np; + struct cache_info *this_leaf; + struct device *cpu_dev = get_cpu_device(cpu); + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + unsigned int index = 0; + + /* skip if of_node is already populated */ + if (this_cpu_ci->info_list->of_node) + return 0; + + if (!cpu_dev) { + pr_err("No cpu device for CPU %d\n", cpu); + return -ENODEV; + } + np = cpu_dev->of_node; + if (!np) { + pr_err("Failed to find cpu%d device node\n", cpu); + return -ENOENT; + } + + while (np && index < cache_leaves(cpu)) { + this_leaf = this_cpu_ci->info_list + index; + if (this_leaf->level != 1) + np = of_find_next_cache_node(np); + else + np = of_node_get(np);/* cpu node itself */ + this_leaf->of_node = np; + index++; + } + return 0; +} + +static inline bool cache_leaves_are_shared(struct cache_info *this_leaf, + struct cache_info *sib_leaf) +{ + return sib_leaf->of_node == this_leaf->of_node; +} + +static int of_cache_shared_cpu_map_setup(unsigned int cpu) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cache_info *this_leaf, *sib_leaf; + unsigned int index; + int ret; + + ret = cache_setup_of_node(cpu); + if (ret) + return ret; + + for (index = 0; index < cache_leaves(cpu); index++) { + unsigned int i; + this_leaf = this_cpu_ci->info_list + index; + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + + for_each_online_cpu(i) { + struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); + if (i == cpu || !sib_cpu_ci->info_list) + continue;/* skip if itself or no cacheinfo */ + sib_leaf = sib_cpu_ci->info_list + index; + if (cache_leaves_are_shared(this_leaf, sib_leaf)) { + cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map); + cpumask_set_cpu(i, &this_leaf->shared_cpu_map); + } + } + } + + return 0; +} +#else +static inline int of_cache_shared_cpu_map_setup(unsigned int cpu) +{ + return 0; +} +#endif + +static void cache_shared_cpu_map_remove(unsigned int cpu) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cache_info *this_leaf, *sib_leaf; + unsigned int sibling, index; + + for (index = 0; index < cache_leaves(cpu); index++) { + this_leaf = this_cpu_ci->info_list + index; + for_each_cpu(sibling, &this_leaf->shared_cpu_map) { + struct cpu_cacheinfo *sib_cpu_ci; + if (sibling == cpu) /* skip itself */ + continue; + sib_cpu_ci = get_cpu_cacheinfo(sibling); + sib_leaf = sib_cpu_ci->info_list + index; + cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); + cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); + } + of_node_put(this_leaf->of_node); + } +} + +int __weak init_cache_level(unsigned int cpu) +{ + return -ENOENT; +} + +int __weak populate_cache_leaves(unsigned int cpu) +{ + return -ENOENT; +} + +static void free_cache_attributes(unsigned int cpu) +{ + cache_shared_cpu_map_remove(cpu); + + kfree(per_cpu_cacheinfo(cpu)); + per_cpu_cacheinfo(cpu) = NULL; +} + +/* must be executed on the cpu whose cache attributes are being detected */ +static int detect_cache_attributes(unsigned int cpu) +{ + int ret; + + if (init_cache_level(cpu)) + return -ENOENT; + + per_cpu_cacheinfo(cpu) = kzalloc(sizeof(struct cache_info) * + cache_leaves(cpu), GFP_KERNEL); + if (per_cpu_cacheinfo(cpu) == NULL) + return -ENOMEM; + + ret = populate_cache_leaves(cpu); + if (ret) + goto free_ci; + /* + * For systems using DT for cache hierarcy, of_node and shared_cpu_map + * will be set up here. Otherwise populate_cache_leaves needs to set + * shared_cpu_map and next-level-cache should not be specified in DT + */ + ret = of_cache_shared_cpu_map_setup(cpu); + if (ret) + goto free_ci; + return 0; + +free_ci: + free_cache_attributes(cpu); + return ret; +} + +#ifdef CONFIG_SYSFS + +/* pointer to kobject for cpuX/cache */ +static DEFINE_PER_CPU(struct kobject *, ci_cache_kobject); +#define per_cpu_cache_kobject(cpu) (per_cpu(ci_cache_kobject, cpu)) + +struct index_kobject { + struct kobject kobj; + unsigned int cpu; + unsigned short index; +}; + +static cpumask_t cache_dev_map; + +/* pointer to array of kobjects for cpuX/cache/indexY */ +static DEFINE_PER_CPU(struct index_kobject *, ci_index_kobject); +#define per_cpu_index_kobject(cpu) (per_cpu(ci_index_kobject, cpu)) +#define INDEX_KOBJECT_PTR(cpu, idx) (&((per_cpu_index_kobject(cpu))[idx])) + +#define show_one_plus(file_name, object) \ +static ssize_t file_name##_show(struct cache_info *this_leaf, \ + char *buf, unsigned int cpu) \ +{ \ + if (!this_leaf->object) \ + return sprintf(buf, "Unknown\n"); \ + return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object); \ +} + +show_one_plus(level, level); +show_one_plus(coherency_line_size, coherency_line_size); +show_one_plus(ways_of_associativity, ways_of_associativity); +show_one_plus(number_of_sets, number_of_sets); +show_one_plus(physical_line_partition, physical_line_partition); + +static ssize_t size_show(struct cache_info *this_leaf, char *buf, + unsigned int cpu) +{ + return sprintf(buf, "%dK\n", this_leaf->size >> 10); +} + +static ssize_t shared_cpu_map_show_func(struct cache_info *this_leaf, + int type, char *buf) +{ + ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; + int n = 0; + + if (len > 1) { + const struct cpumask *mask = &this_leaf->shared_cpu_map; + n = type ? cpulist_scnprintf(buf, len - 2, mask) : + cpumask_scnprintf(buf, len - 2, mask); + buf[n++] = '\n'; + buf[n] = '\0'; + } + return n; +} + +static inline ssize_t shared_cpu_map_show(struct cache_info *leaf, char *buf, + unsigned int cpu) +{ + return shared_cpu_map_show_func(leaf, 0, buf); +} + +static inline ssize_t shared_cpu_list_show(struct cache_info *leaf, + char *buf, unsigned int cpu) +{ + return shared_cpu_map_show_func(leaf, 1, buf); +} + +static ssize_t type_show(struct cache_info *this_leaf, char *buf, + unsigned int cpu) +{ + switch (this_leaf->type) { + case CACHE_TYPE_DATA: + return sprintf(buf, "Data\n"); + case CACHE_TYPE_INST: + return sprintf(buf, "Instruction\n"); + case CACHE_TYPE_UNIFIED: + return sprintf(buf, "Unified\n"); + default: + return sprintf(buf, "Unknown\n"); + } +} + +static ssize_t attributes_show(struct cache_info *this_leaf, char *buf, + unsigned int cpu) +{ + unsigned int ci_attr = this_leaf->attributes; + ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf - 2; + int n = 0; + + if (!ci_attr) + return sprintf(buf, "Unknown\n"); + + if (ci_attr & CACHE_WRITE_THROUGH) + n += snprintf(buf + n, len - n, "WriteThrough\n"); + if (ci_attr & CACHE_WRITE_BACK) + n += snprintf(buf + n, len - n, "WriteBack\n"); + if (ci_attr & CACHE_READ_ALLOCATE) + n += snprintf(buf + n, len - n, "ReadAllocate\n"); + if (ci_attr & CACHE_WRITE_ALLOCATE) + n += snprintf(buf + n, len - n, "WriteAllocate\n"); + buf[n] = '\0'; + return n; +} + +#define to_object(k) container_of(k, struct index_kobject, kobj) +#define to_attr(a) container_of(a, struct cache_attr, attr) + +#define define_one_ro(_name) \ +static struct cache_attr _name = __ATTR_RO(_name) + +define_one_ro(level); +define_one_ro(type); +define_one_ro(coherency_line_size); +define_one_ro(ways_of_associativity); +define_one_ro(number_of_sets); +define_one_ro(size); +define_one_ro(attributes); +define_one_ro(shared_cpu_map); +define_one_ro(shared_cpu_list); +define_one_ro(physical_line_partition); + +static struct attribute *default_attrs[] = { + &type.attr, + &level.attr, + &coherency_line_size.attr, + &ways_of_associativity.attr, + &number_of_sets.attr, + &size.attr, + &attributes.attr, + &physical_line_partition.attr, + &shared_cpu_map.attr, + &shared_cpu_list.attr, + NULL +}; + +static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct cache_attr *fattr = to_attr(attr); + struct index_kobject *this_idx = to_object(kobj); + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(this_idx->cpu); + ssize_t ret; + + ret = fattr->show ? + fattr->show(this_cpu_ci->info_list + this_idx->index, buf, + this_idx->cpu) : 0; + return ret; +} + +static ssize_t store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct cache_attr *fattr = to_attr(attr); + struct index_kobject *this_idx = to_object(kobj); + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(this_idx->cpu); + ssize_t ret; + + ret = fattr->store ? + fattr->store(this_cpu_ci->info_list + this_idx->index, buf, + count, this_idx->cpu) : 0; + return ret; +} + +static const struct sysfs_ops sysfs_ops = { + .show = show, + .store = store, +}; + +static struct kobj_type ktype_cache = { + .sysfs_ops = &sysfs_ops, + .default_attrs = default_attrs, +}; + +static void cpu_cache_sysfs_exit(unsigned int cpu) +{ + kfree(per_cpu_index_kobject(cpu)); + per_cpu_index_kobject(cpu) = NULL; + kobject_put(per_cpu_cache_kobject(cpu)); + per_cpu_cache_kobject(cpu) = NULL; +} + +static int cpu_cache_sysfs_init(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); + + if (per_cpu_cacheinfo(cpu) == NULL) + return -ENOENT; + + per_cpu_cache_kobject(cpu) = kobject_create_and_add("cache", + &dev->kobj); + if (unlikely(!per_cpu_cache_kobject(cpu))) + goto err_out; + + /* Allocate all required memory */ + per_cpu_index_kobject(cpu) = kzalloc(sizeof(struct index_kobject) * + cache_leaves(cpu), GFP_KERNEL); + if (unlikely(per_cpu_index_kobject(cpu) == NULL)) + goto err_out; + + return 0; + +err_out: + cpu_cache_sysfs_exit(cpu); + return -ENOMEM; +} + +int __weak cache_add_private_attributes(struct kobject *kobj, unsigned int cpu, + unsigned short index) +{ + return 0; +} + +/* Add/Remove cache interface for CPU device */ +static int cache_add_dev(unsigned int cpu) +{ + struct index_kobject *this_object; + unsigned long i, j; + int rc; + + rc = cpu_cache_sysfs_init(cpu); + if (unlikely(rc < 0)) + return rc; + + for (i = 0; i < cache_leaves(cpu); i++) { + this_object = INDEX_KOBJECT_PTR(cpu, i); + this_object->cpu = cpu; + this_object->index = i; + + rc = kobject_init_and_add(&(this_object->kobj), + &ktype_cache, + per_cpu_cache_kobject(cpu), + "index%1lu", i); + if (unlikely(rc)) + goto kobj_err; + + rc = cache_add_private_attributes(&(this_object->kobj), cpu, i); + if (unlikely(rc)) { + i++; /* delete including current kobject */ + goto kobj_err; + } + + kobject_uevent(&(this_object->kobj), KOBJ_ADD); + } + cpumask_set_cpu(cpu, &cache_dev_map); + + kobject_uevent(per_cpu_cache_kobject(cpu), KOBJ_ADD); + return 0; +kobj_err: + for (j = 0; j < i; j++) + kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); + cpu_cache_sysfs_exit(cpu); + return rc; +} + +static void cache_remove_dev(unsigned int cpu) +{ + unsigned long i; + + if (!cpumask_test_cpu(cpu, &cache_dev_map)) + return; + cpumask_clear_cpu(cpu, &cache_dev_map); + + for (i = 0; i < cache_leaves(cpu); i++) + kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); + cpu_cache_sysfs_exit(cpu); +} + +static int cacheinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + int rc = 0; + + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + rc = detect_cache_attributes(cpu); + break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + rc = cache_add_dev(cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cache_remove_dev(cpu); + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + if (per_cpu_cacheinfo(cpu)) + free_cache_attributes(cpu); + break; + } + return notifier_from_errno(rc); +} + +/* Helpers to make sure detect_cache_attributes is called on right cpu */ +static void _detect_cache_attributes(void *retval) +{ + int cpu = smp_processor_id(); + *(int *)retval = detect_cache_attributes(cpu); +} + +static int __detect_cache_attributes(unsigned int cpu) +{ + int retval; + smp_call_function_single(cpu, _detect_cache_attributes, &retval, true); + return retval; +} + +static int __init cacheinfo_sysfs_init(void) +{ + int cpu; + int rc; + + for_each_online_cpu(cpu) { + rc = __detect_cache_attributes(cpu); + if (rc) { + pr_err("error detecting cacheinfo..cpu%d\n", cpu); + return rc; + } + rc = cache_add_dev(cpu); + if (rc) { + free_cache_attributes(cpu); + pr_err("error populating cacheinfo..cpu%d\n", cpu); + return rc; + } + } + hotcpu_notifier(cacheinfo_cpu_callback, 0); + return 0; +} + +device_initcall(cacheinfo_sysfs_init); + +#endif /* CONFIG_SYSFS */ diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h new file mode 100644 index 0000000..a015a17 --- /dev/null +++ b/include/linux/cacheinfo.h @@ -0,0 +1,60 @@ +#ifndef _LINUX_CACHEINFO_H +#define _LINUX_CACHEINFO_H + +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/cpumask.h> +#include <linux/of.h> +#include <linux/sysfs.h> + +enum cache_type { + CACHE_TYPE_NOCACHE = 0, + CACHE_TYPE_INST = BIT(0), + CACHE_TYPE_DATA = BIT(1), + CACHE_TYPE_SEPARATE = CACHE_TYPE_INST | CACHE_TYPE_DATA, + CACHE_TYPE_UNIFIED = BIT(2), +}; + +struct cache_info { + /* core properties */ + enum cache_type type; /* data, inst or unified */ + unsigned int level; + unsigned int coherency_line_size; /* cache line size */ + unsigned int number_of_sets; /* no. of sets per way */ + unsigned int ways_of_associativity; /* no. of ways */ + unsigned int physical_line_partition; /* no. of lines per tag */ + unsigned int size; /* total cache size */ + cpumask_t shared_cpu_map; + unsigned int attributes; +#define CACHE_WRITE_THROUGH BIT(0) +#define CACHE_WRITE_BACK BIT(1) +#define CACHE_READ_ALLOCATE BIT(2) +#define CACHE_WRITE_ALLOCATE BIT(3) + + /* book keeping */ + struct device_node *of_node; /* cpu if no explicit cache node */ + void *priv; +}; + +struct cpu_cacheinfo { + struct cache_info *info_list; + unsigned int num_levels; + unsigned int num_leaves; +}; + +struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); +int init_cache_level(unsigned int cpu); +int populate_cache_leaves(unsigned int cpu); + +#ifdef CONFIG_SYSFS +struct cache_attr { + struct attribute attr; + ssize_t (*show)(struct cache_info *, char *, unsigned int); + ssize_t (*store)(struct cache_info *, const char *, size_t count, + unsigned int); +}; +int cache_add_private_attributes(struct kobject *kobj, unsigned int cpu, + unsigned short index); +#endif + +#endif /* _LINUX_CACHEINFO_H */ -- 1.8.3.2 -- To unsubscribe from this list: send the line "unsubscribe linux-doc" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html