Enable bpf programs to make use of rstat to collect cgroup hierarchical stats efficiently: - Add cgroup_rstat_updated() kfunc, for bpf progs that collect stats. - Add cgroup_rstat_flush() kfunc, for bpf progs that read stats. - Add an empty bpf_rstat_flush() hook that is called during rstat flushing, for bpf progs that flush stats to attach to. Attaching a bpf prog to this hook effectively registers it as a flush callback. Signed-off-by: Yosry Ahmed <yosryahmed@xxxxxxxxxx> --- kernel/cgroup/rstat.c | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 24b5c2ab55983..94140bd0d02a4 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -3,6 +3,11 @@ #include <linux/sched/cputime.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/btf_ids.h> + + static DEFINE_SPINLOCK(cgroup_rstat_lock); static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock); @@ -141,6 +146,23 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, return pos; } +/* + * A hook for bpf stat collectors to attach to and flush their stats. + * Together with providing bpf kfuncs for cgroup_rstat_updated() and + * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that + * collect cgroup stats can integrate with rstat for efficient flushing. + * + * A static noinline declaration here could cause the compiler to optimize away + * the function. A global noinline declaration will keep the definition, but may + * optimize away the callsite. Therefore, __weak is needed to ensure that the + * call is still emitted, by telling the compiler that we don't know what the + * function might eventually be. + */ +__weak noinline void bpf_rstat_flush(struct cgroup *cgrp, + struct cgroup *parent, int cpu) +{ +} + /* see cgroup_rstat_flush() */ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock) @@ -168,6 +190,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) struct cgroup_subsys_state *css; cgroup_base_stat_flush(pos, cpu); + bpf_rstat_flush(pos, cgroup_parent(pos), cpu); rcu_read_lock(); list_for_each_entry_rcu(css, &pos->rstat_css_list, @@ -469,3 +492,26 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) "system_usec %llu\n", usage, utime, stime); } + +/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */ +BTF_SET_START(bpf_rstat_check_kfunc_ids) +BTF_ID(func, cgroup_rstat_updated) +BTF_ID(func, cgroup_rstat_flush) +BTF_SET_END(bpf_rstat_check_kfunc_ids) + +BTF_SET_START(bpf_rstat_sleepable_kfunc_ids) +BTF_ID(func, cgroup_rstat_flush) +BTF_SET_END(bpf_rstat_sleepable_kfunc_ids) + +static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = { + .owner = THIS_MODULE, + .check_set = &bpf_rstat_check_kfunc_ids, + .sleepable_set = &bpf_rstat_sleepable_kfunc_ids, +}; + +static int __init bpf_rstat_kfunc_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, + &bpf_rstat_kfunc_set); +} +late_initcall(bpf_rstat_kfunc_init); -- 2.36.1.476.g0c4daa206d-goog