On Thu, Feb 27, 2025 at 01:55:43PM -0800, inwardvessel wrote: > From: JP Kobryn <inwardvessel@xxxxxxxxx> > > A majority of the cgroup_rstat_cpu struct size is made up of the base > stat entities. Since only the "self" subsystem state makes use of these, > move them into a struct of their own. This allows for a new compact > cgroup_rstat_cpu struct that the formal subsystems can make use of. > Where applicable, decide on whether to allocate the compact or full > struct including the base stats. Mentioning the memory savings in this patch's log would be helpful. > > Signed-off-by: JP Kobryn <inwardvessel@xxxxxxxxx> > --- > include/linux/cgroup-defs.h | 37 ++++++++++++++---------- > kernel/cgroup/rstat.c | 57 +++++++++++++++++++++++++------------ > 2 files changed, 61 insertions(+), 33 deletions(-) > > diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h > index 1598e1389615..b0a07c63fd46 100644 > --- a/include/linux/cgroup-defs.h > +++ b/include/linux/cgroup-defs.h > @@ -170,7 +170,10 @@ struct cgroup_subsys_state { > struct percpu_ref refcnt; > > /* per-cpu recursive resource statistics */ > - struct cgroup_rstat_cpu __percpu *rstat_cpu; > + union { > + struct cgroup_rstat_cpu __percpu *rstat_cpu; > + struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu; > + }; > > /* > * siblings list anchored at the parent's ->children > @@ -356,6 +359,24 @@ struct cgroup_base_stat { > * resource statistics on top of it - bsync, bstat and last_bstat. > */ > struct cgroup_rstat_cpu { > + /* > + * Child cgroups with stat updates on this cpu since the last read > + * are linked on the parent's ->updated_children through > + * ->updated_next. > + * > + * In addition to being more compact, singly-linked list pointing > + * to the cgroup makes it unnecessary for each per-cpu struct to > + * point back to the associated cgroup. > + * > + * Protected by per-cpu cgroup_rstat_cpu_lock. I just noticed, the patch that split the lock should have updated this comment. > + */ > + struct cgroup_subsys_state *updated_children; /* terminated by self */ > + struct cgroup_subsys_state *updated_next; /* NULL if not on list */ > +}; > + > +struct cgroup_rstat_base_cpu { > + struct cgroup_rstat_cpu self; Why 'self'? Why not 'rstat_cpu' like it's named in struct cgroup_subsys_state? > + > /* > * ->bsync protects ->bstat. These are the only fields which get > * updated in the hot path. > @@ -382,20 +403,6 @@ struct cgroup_rstat_cpu { > * deltas to propagate to the per-cpu subtree_bstat. > */ > struct cgroup_base_stat last_subtree_bstat; > - > - /* > - * Child cgroups with stat updates on this cpu since the last read > - * are linked on the parent's ->updated_children through > - * ->updated_next. > - * > - * In addition to being more compact, singly-linked list pointing > - * to the cgroup makes it unnecessary for each per-cpu struct to > - * point back to the associated cgroup. > - * > - * Protected by per-cpu cgroup_rstat_cpu_lock. > - */ > - struct cgroup_subsys_state *updated_children; /* terminated by self */ > - struct cgroup_subsys_state *updated_next; /* NULL if not on list */ > }; > > struct cgroup_freezer_state { > diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c > index b3eaefc1fd07..c08ebe2f9568 100644 > --- a/kernel/cgroup/rstat.c > +++ b/kernel/cgroup/rstat.c > @@ -24,6 +24,12 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu( > return per_cpu_ptr(css->rstat_cpu, cpu); > } > > +static struct cgroup_rstat_base_cpu *cgroup_rstat_base_cpu( > + struct cgroup_subsys_state *css, int cpu) > +{ > + return per_cpu_ptr(css->rstat_base_cpu, cpu); > +} > + > static inline bool is_base_css(struct cgroup_subsys_state *css) > { > return css->ss == NULL; > @@ -438,17 +444,31 @@ int cgroup_rstat_init(struct cgroup_subsys_state *css) > > /* the root cgrp's self css has rstat_cpu preallocated */ > if (!css->rstat_cpu) { > - css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu); > - if (!css->rstat_cpu) > - return -ENOMEM; > - } > + if (is_base_css(css)) { > + css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu); > + if (!css->rstat_base_cpu) > + return -ENOMEM; > > - /* ->updated_children list is self terminated */ > - for_each_possible_cpu(cpu) { > - struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(css, cpu); > + for_each_possible_cpu(cpu) { > + struct cgroup_rstat_base_cpu *rstatc; We should use different variable names for cgroup_rstat_base_cpu and cgroup_rstat_cpu throughout. Maybe 'brstatc' or 'rstatbc' for the latter? > + > + rstatc = cgroup_rstat_base_cpu(css, cpu); > + rstatc->self.updated_children = css; > + u64_stats_init(&rstatc->bsync); > + } > + } else { > + css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu); > + if (!css->rstat_cpu) > + return -ENOMEM; > + > + for_each_possible_cpu(cpu) { > + struct cgroup_rstat_cpu *rstatc; > + > + rstatc = cgroup_rstat_cpu(css, cpu); > + rstatc->updated_children = css; > + } > + } > > - rstatc->updated_children = css; > - u64_stats_init(&rstatc->bsync); I think there's too much replication here. We can probably do something like this (untested): diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index de057c992f824..1750a69887a2e 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -443,34 +443,30 @@ int cgroup_rstat_init(struct cgroup_subsys_state *css) int cpu; /* the root cgrp's self css has rstat_cpu preallocated */ - if (!css->rstat_cpu) { - if (is_base_css(css)) { - css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu); - if (!css->rstat_base_cpu) - return -ENOMEM; - - for_each_possible_cpu(cpu) { - struct cgroup_rstat_base_cpu *rstatc; + if (css->rstat_cpu) + return 0; - rstatc = cgroup_rstat_base_cpu(css, cpu); - rstatc->self.updated_children = css; - u64_stats_init(&rstatc->bsync); - } - } else { - css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu); - if (!css->rstat_cpu) - return -ENOMEM; + if (is_base_css(css)) { + css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu); + if (!css->rstat_base_cpu) + return -ENOMEM; + } else { + css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu); + if (!css->rstat_cpu) + return -ENOMEM; + } - for_each_possible_cpu(cpu) { - struct cgroup_rstat_cpu *rstatc; + for_each_possible_cpu(cpu) { + struct cgroup_rstat_base_cpu *brstatc = NULL; + struct cgroup_rstat_cpu *rstatc; - rstatc = cgroup_rstat_cpu(css, cpu); - rstatc->updated_children = css; - } + if (is_base_css(css)) { + brstatc = cgroup_rstat_base_cpu(css, cpu); + u64_stats_init(&brstatc->bsync); + rstatc = brstatc->self; } - + rstatc->updated_children = css; } - return 0; } > } > > return 0; > @@ -522,9 +542,10 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, > > static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) > { > - struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(&cgrp->self, cpu); > + struct cgroup_rstat_base_cpu *rstatc = cgroup_rstat_base_cpu( > + &cgrp->self, cpu); > struct cgroup *parent = cgroup_parent(cgrp); > - struct cgroup_rstat_cpu *prstatc; > + struct cgroup_rstat_base_cpu *prstatc; Same here, we should use different names than rstatc and prstatc. Same applies for the rest of the diff. > struct cgroup_base_stat delta; > unsigned seq; > > @@ -552,25 +573,25 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) > cgroup_base_stat_add(&cgrp->last_bstat, &delta); > > delta = rstatc->subtree_bstat; > - prstatc = cgroup_rstat_cpu(&parent->self, cpu); > + prstatc = cgroup_rstat_base_cpu(&parent->self, cpu); > cgroup_base_stat_sub(&delta, &rstatc->last_subtree_bstat); > cgroup_base_stat_add(&prstatc->subtree_bstat, &delta); > cgroup_base_stat_add(&rstatc->last_subtree_bstat, &delta); > } > } > > -static struct cgroup_rstat_cpu * > +static struct cgroup_rstat_base_cpu * > cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) > { > - struct cgroup_rstat_cpu *rstatc; > + struct cgroup_rstat_base_cpu *rstatc; > > - rstatc = get_cpu_ptr(cgrp->self.rstat_cpu); > + rstatc = get_cpu_ptr(cgrp->self.rstat_base_cpu); > *flags = u64_stats_update_begin_irqsave(&rstatc->bsync); > return rstatc; > } > > static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, > - struct cgroup_rstat_cpu *rstatc, > + struct cgroup_rstat_base_cpu *rstatc, > unsigned long flags) > { > u64_stats_update_end_irqrestore(&rstatc->bsync, flags); > @@ -580,7 +601,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, > > void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) > { > - struct cgroup_rstat_cpu *rstatc; > + struct cgroup_rstat_base_cpu *rstatc; > unsigned long flags; > > rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); > @@ -591,7 +612,7 @@ void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) > void __cgroup_account_cputime_field(struct cgroup *cgrp, > enum cpu_usage_stat index, u64 delta_exec) > { > - struct cgroup_rstat_cpu *rstatc; > + struct cgroup_rstat_base_cpu *rstatc; > unsigned long flags; > > rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); > -- > 2.43.5 > >