struct histogram provides a histogram that counts u64 samples in arbitrary user-configurable buckets, optimized for efficient concurrent recording. This is a squashed, refactored, and modified version of a previously-internal implementation. Thanks to the following individuals for portions of the implementation: Jamie Liu <jamieliu@xxxxxxxxxx> - Original implementation Yu Zhao <yuzhao@xxxxxxxxxx> - Code cleanups + simplification Signed-off-by: Axel Rasmussen <axelrasmussen@xxxxxxxxxx> --- include/linux/histogram.h | 270 ++++++++++++++++++++++++++++++++++++++ lib/Kconfig | 3 + lib/Makefile | 2 + lib/histogram.c | 157 ++++++++++++++++++++++ 4 files changed, 432 insertions(+) create mode 100644 include/linux/histogram.h create mode 100644 lib/histogram.c diff --git a/include/linux/histogram.h b/include/linux/histogram.h new file mode 100644 index 000000000000..137930ca933f --- /dev/null +++ b/include/linux/histogram.h @@ -0,0 +1,270 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_HISTOGRAM_H +#define _LINUX_HISTOGRAM_H + +#include <linux/compiler.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> +#include <linux/types.h> + +/* + * Histograms for counting events represented by u64s. + * + * Each histogram may have any number of thresholds. A histogram with + * nr_thresholds thresholds has nr_thresholds buckets, where bucket[i] counts + * samples in the half-open interval ( thresholds[i-1], thresholds[i] ]. + * (thresholds[-1] is implicitly defined as 0, and thresholds[nr_thresholds-1] + * must be 2**64-1.) Each histogram's thresholds are immutable after creation. + * Each bucket counts up to a u64 worth of events. + * + * Example usage: + * + * u64 thresholds[] = {1, 2, 5, 10, 20, 50, 100, ~0}; + * u64 buckets[ARRAY_SIZE(thresholds)]; + * struct histogram *hist = histogram_alloc(thresholds, ARRAY_SIZE(thresholds)); + * if (IS_ERR(hist)) {...} + * histogram_record(hist, 0, 1); + * histogram_record(hist, 8, 2); + * histogram_record(hist, 20, 3); + * histogram_record(hist, 110, 4); + * histogram_read_buckets(hist, buckets); + * // buckets == {1, 0, 0, 2, 3, 0, 0, 4} + * histogram_free(hist); + * + * For convenience, a struct histogram_rcu type is also provided which wraps an + * RCU-sched protected pointer to a struct histogram, allowing thresholds to be + * modified even while other threads may be recording to the histogram. + * Functions that operate on a histogram_rcu are distinguished by an _rcu + * suffix. + * + * Example usage: + * + * u64 thresholds_1[] = {1, 2, 5, 10, 20, 50, 100, ~0}; + * u64 thresholds_2[] = {100, 200, 500, 1000, ~0}; + * u64 buckets[ARRAY_SIZE(thresholds_1)]; + * struct histogram_rcu hrcu; + * if (histogram_init_rcu(&hrcu, thresholds_1, ARRAY_SIZE(thresholds_1))) {...} + * histogram_record_rcu(&hrcu, 0, 1); + * histogram_record_rcu(&hrcu, 8, 2); + * histogram_record_rcu(&hrcu, 20, 3); + * histogram_record_rcu(&hrcu, 110, 4); + * if (histogram_read_rcu(&hrcu, buckets, NULL, + * ARRAY_SIZE(thresholds_1)) < 0) {...} + * // buckets == {1, 0, 0, 2, 3, 0, 0, 4} + * if (histogram_set_thresholds_rcu(&hrcu, thresholds_2, + * ARRAY_SIZE(thresholds_2)) {...} + * if (histogram_read_rcu(&hrcu, buckets, NULL, + * ARRAY_SIZE(thresholds_2)) < 0) {...} + * // buckets == {0, 0, 0, 0, 0} + * histogram_record_rcu(&hrcu, 50, 1); + * histogram_record_rcu(&hrcu, 150, 2); + * histogram_record_rcu(&hrcu, 5000, 3); + * if (histogram_read_rcu(&hrcu, buckets, NULL, + * ARRAY_SIZE(thresholds_2)) < 0) {...} + * // buckets == {1, 2, 0, 0, 3} + * histogram_destroy_rcu(&hrcu); + */ + +struct histogram { + struct rcu_head rcu; + u64 __percpu *buckets; + size_t nr_thresholds; + u64 thresholds[0]; /* flexible array member */ +}; + +struct histogram_rcu { + struct histogram __rcu *hist; +}; + +/** + * histogram_record() - record samples in histogram + * @hist: histogram + * @val: sample value + * @count: number of samples + * + * histogram_record() does not require synchronization with concurrent + * histogram_record() or histogram_read_buckets(). + * + * histogram_record() may be called from tracing code. + */ +void histogram_record(struct histogram *hist, u64 val, u64 count); + +/** + * histogram_read_buckets() - read histogram buckets + * @hist: histogram + * @buckets: array with space for at least hist->nr_thresholds elements + * + * Sets each element of @buckets to the value of the corresponding histogram + * bucket. + * + * histogram_read_buckets() does not require synchronization with concurrent + * histogram_record() or histogram_read_buckets(). + * + * histogram_read_buckets() does not block recording, so values are not read + * from all CPUs atomically. If this is a problem, the caller should stop + * recording first. + */ +void histogram_read_buckets(const struct histogram *hist, u64 *buckets); + +/** + * histogram_alloc() - create a histogram + * @thresholds: thresholds array + * @nr_thresholds: number of elements in @thresholds + * + * Histogram buckets are initialized to zero. @thresholds must be sorted in + * ascending order and must not contain any duplicates. @nr_thresholds must be + * >= 1. thresholds[nr_thresholds-1] must be ~(u64)0. + * + * histogram_alloc() makes a copy of @thresholds if successful, so ownership of + * @thresholds is unaffected by histogram_alloc(). + * + * Context: Performs allocation with GFP_KERNEL. + * + * Returns: Pointer to new histogram, or a PTR_ERR on failure. + */ +struct histogram *histogram_alloc(const u64 *thresholds, size_t nr_thresholds); + +/** + * histogram_free() - delete a histogram + * @hist: histogram + */ +void histogram_free(struct histogram *hist); + +/** + * histogram_record_rcu() - record samples in RCU-protected histogram + * @hrcu: histogram + * @val: sample value + * @count: number of samples + * + * histogram_record_rcu() does not require external synchronization, even with + * histogram_destroy_rcu(). Calling histogram_record_rcu() on a @hrcu that + * histogram_destroy_rcu() has been called on is a no-op. + * + * histogram_record_rcu() may be called from tracing code. + */ +static inline void histogram_record_rcu(struct histogram_rcu *hrcu, u64 val, + u64 count) +{ + struct histogram *hist; + + rcu_read_lock_sched_notrace(); + hist = rcu_dereference_sched(hrcu->hist); + if (likely(hist)) + histogram_record(hist, val, count); + rcu_read_unlock_sched_notrace(); +} + +/** + * histogram_read_rcu() - read RCU-protected histogram + * @hrcu: histogram + * @buckets: array with space for at least nr_thresholds elements + * @thresholds: array with space for at least nr_thresholds elements + * @nr_thresholds: array size (see above) + * + * If @buckets is not NULL, sets each element of @buckets to the value of the + * corresponding histogram bucket. + * + * If @thresholds is not NULL, sets each element of @thresholds to the + * corresponding histogram bucket threshold. + * + * On failure (for example, if nr_thresholds < hrcu->hist->nr_thresholds), + * neither @buckets nor @thresholds will be modified. + * + * histogram_read_rcu() does not require external synchronization, even with + * histogram_destroy_rcu(). Calling histogram_read_rcu() on a @hrcu that + * histogram_destroy_rcu() has been called on returns 0. + * + * histogram_read_rcu() does not block recording, so bucket values are not read + * from all CPUs atomically. If this is a problem, the caller should stop + * recording first. + * + * Returns: If successful, returns the actual number of thresholds stored in + * @thresholds; if nr_thresholds is too small, returns the negative of the + * minimum required nr_thresholds to succeed; if the histogram has been + * destroyed by histogram_destroy_rcu(), returns 0. (Note that if nr_thresholds + * is too small, it is not guaranteed that calling histogram_read_rcu() again + * with the returned value of nr_thresholds will succeed, because another + * thread could raise the number of thresholds again in the interim.) + */ +ssize_t histogram_read_rcu(const struct histogram_rcu *hrcu, u64 *buckets, + u64 *thresholds, size_t nr_thresholds); + +/** + * histogram_set_thresholds_rcu() - set RCU-protected histogram thresholds + * @hrcu: histogram + * @thresholds: thresholds array + * @nr_thresholds: number of elements in @thresholds + * + * The semantics that apply to @thresholds are the same as for histogram_alloc. + * + * If successful, all buckets are atomically reset to zero. + * + * The caller must synchronize between concurrent calls to + * histogram_set_thresholds_rcu(), histogram_init_rcu(), and + * histogram_destroy_rcu(). + * + * Context: Performs allocation with GFP_KERNEL. + * + * Returns: Zero on success, or a negative error code on failure. + */ +int histogram_set_thresholds_rcu(struct histogram_rcu *hrcu, + const u64 *thresholds, size_t nr_thresholds); + +/** + * histogram_init_rcu() - initialize RCU-protected histogram + * @hrcu: histogram + * @thresholds: thresholds array + * @nr_thresholds: number of elements in @thresholds. + * + * Each struct histogram_rcu must be initialized by histogram_init_rcu() at + * least once before it is valid to call any other functions on it. A struct + * histogram_rcu that has been previously initialized cannot be initialized + * again, unless it has been subsequently destroyed by histogram_destroy_rcu(). + * + * The semantics that apply to @thresholds are the same as for + * histogram_alloc(), with one exception: @thresholds may be NULL iff + * @nr_thresholds is 0. In this case, @hrcu will behave as if it has already + * been destroyed (histogram_record_rcu() will no-op and histogram_read_rcu() + * will return 0). + * + * The caller must synchronize between concurrent calls to + * histogram_set_thresholds_rcu(), histogram_init_rcu(), and + * histogram_destroy_rcu(). + * + * Context: Performs allocation with GFP_KERNEL. + * + * Returns: Zero on success, or a negative error code on failure. + */ +static inline int histogram_init_rcu(struct histogram_rcu *hrcu, + const u64 *thresholds, + size_t nr_thresholds) +{ + RCU_INIT_POINTER(hrcu->hist, NULL); + if (!thresholds && !nr_thresholds) + return 0; + return histogram_set_thresholds_rcu(hrcu, thresholds, nr_thresholds); +} + +void histogram_destroy_rcu_cb(struct rcu_head *rcu); + +/** + * histogram_destroy_rcu() - destroy RCU-protected histogram + * @hrcu: histogram + * + * After histogram_destroy_rcu() has been called on a @hrcu, it is valid to call + * histogram_init_rcu() on it again. + * + * The caller must synchronize between concurrent calls to + * histogram_set_thresholds_rcu(), histogram_init_rcu(), and + * histogram_destroy_rcu(). + */ +static inline void histogram_destroy_rcu(struct histogram_rcu *hrcu) +{ + struct histogram *hist = rcu_dereference_raw(hrcu->hist); + + RCU_INIT_POINTER(hrcu->hist, NULL); + if (likely(hist)) + call_rcu(&hist->rcu, histogram_destroy_rcu_cb); +} + +#endif /* _LINUX_HISTOGRAM_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 5d53f9609c25..4714bdfa343b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -648,6 +648,9 @@ config OBJAGG config STRING_SELFTEST tristate "Test string functions" +config HISTOGRAM + bool + endmenu config GENERIC_IOREMAP diff --git a/lib/Makefile b/lib/Makefile index 685aee60de1d..f61b1c15d656 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -248,6 +248,8 @@ obj-$(CONFIG_ASN1) += asn1_decoder.o obj-$(CONFIG_FONT_SUPPORT) += fonts/ +obj-$(CONFIG_HISTOGRAM) += histogram.o + hostprogs := gen_crc32table hostprogs += gen_crc64table clean-files := crc32table.h diff --git a/lib/histogram.c b/lib/histogram.c new file mode 100644 index 000000000000..b68334275a46 --- /dev/null +++ b/lib/histogram.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/cache.h> +#include <linux/cpumask.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/histogram.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/types.h> + +void histogram_record(struct histogram *hist, u64 val, u64 count) +{ + size_t i, lower, upper; + + /* Binary search invariant: correct bucket in [lower, upper] */ + lower = 0; + upper = hist->nr_thresholds - 1; + while (lower < upper) { + /* can't realistically overflow, nr_thresholds < 2**63 */ + i = (lower + upper) / 2; + if (val <= hist->thresholds[i]) + upper = i; + else + lower = i + 1; + } + i = upper; + /* _notrace because histogram_record may be called from tracing */ + preempt_disable_notrace(); + __this_cpu_add(hist->buckets[i], count); + preempt_enable_notrace(); +} +EXPORT_SYMBOL_GPL(histogram_record); + +void histogram_read_buckets(const struct histogram *hist, u64 *buckets) +{ + int cpu; + size_t nr_buckets; + size_t i; + + nr_buckets = hist->nr_thresholds; + memset(buckets, 0, nr_buckets * sizeof(u64)); + /* + * This must use for_each_possible_cpu rather than for_each_online_cpu + * to ensure we count records from any CPUs that have since been + * removed. + */ + for_each_possible_cpu(cpu) { + for (i = 0; i < nr_buckets; i++) + buckets[i] += per_cpu(hist->buckets[i], cpu); + } +} +EXPORT_SYMBOL_GPL(histogram_read_buckets); + +static int histogram_check_thresholds(const u64 *thresholds, + size_t nr_thresholds) +{ + unsigned int i; + + if (!nr_thresholds) + return -EINVAL; + if (!thresholds) + return -EFAULT; + for (i = 1; i < nr_thresholds; i++) + if (thresholds[i - 1] >= thresholds[i]) + return -EINVAL; + if (thresholds[nr_thresholds - 1] != ~0ULL) + return -EINVAL; + return 0; +} + +struct histogram *histogram_alloc(const u64 *thresholds, size_t nr_thresholds) +{ + struct histogram *hist; + size_t hist_size; + int ret; + + ret = histogram_check_thresholds(thresholds, nr_thresholds); + if (ret) + return ERR_PTR(ret); + hist_size = sizeof(struct histogram) + nr_thresholds * sizeof(u64); + hist = kmalloc(ALIGN(hist_size, cache_line_size()), GFP_KERNEL); + if (!hist) + return ERR_PTR(-ENOMEM); + hist->buckets = __alloc_percpu(nr_thresholds * sizeof(*hist->buckets), + __alignof__(*hist->buckets)); + if (!hist->buckets) { + kfree(hist); + return ERR_PTR(-ENOMEM); + } + hist->nr_thresholds = nr_thresholds; + memcpy(hist->thresholds, thresholds, nr_thresholds * sizeof(u64)); + return hist; +} +EXPORT_SYMBOL_GPL(histogram_alloc); + +void histogram_free(struct histogram *hist) +{ + if (!hist) + return; + free_percpu(hist->buckets); + kfree(hist); +} +EXPORT_SYMBOL_GPL(histogram_free); + +ssize_t histogram_read_rcu(const struct histogram_rcu *hrcu, u64 *buckets, + u64 *thresholds, size_t nr_thresholds) +{ + const struct histogram *hist; + ssize_t ret = 0; + + rcu_read_lock_sched(); + hist = rcu_dereference_sched(hrcu->hist); + if (!hist) { + ret = 0; + goto out; + } + if (nr_thresholds < hist->nr_thresholds) { + ret = -hist->nr_thresholds; + goto out; + } + if (buckets) + histogram_read_buckets(hist, buckets); + if (thresholds) + memcpy(thresholds, hist->thresholds, + hist->nr_thresholds * sizeof(u64)); + ret = hist->nr_thresholds; +out: + rcu_read_unlock_sched(); + return ret; +} +EXPORT_SYMBOL_GPL(histogram_read_rcu); + +int histogram_set_thresholds_rcu(struct histogram_rcu *hrcu, + const u64 *thresholds, size_t nr_thresholds) +{ + struct histogram *old_hist = rcu_dereference_raw(hrcu->hist); + struct histogram *new_hist = histogram_alloc(thresholds, nr_thresholds); + + if (IS_ERR(new_hist)) + return PTR_ERR(new_hist); + rcu_assign_pointer(hrcu->hist, new_hist); + if (old_hist) { + synchronize_rcu(); + histogram_free(old_hist); + } + return 0; +} +EXPORT_SYMBOL_GPL(histogram_set_thresholds_rcu); + +void histogram_destroy_rcu_cb(struct rcu_head *rcu) +{ + histogram_free(container_of(rcu, struct histogram, rcu)); +} +EXPORT_SYMBOL_GPL(histogram_destroy_rcu_cb); -- 2.27.0.rc0.183.gde8f92d652-goog