Re: [PATCH 4/4 v2] cgroup: separate rstat list pointers from base stats

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 27, 2025 at 01:55:43PM -0800, inwardvessel wrote:
> From: JP Kobryn <inwardvessel@xxxxxxxxx>
> 
> A majority of the cgroup_rstat_cpu struct size is made up of the base
> stat entities. Since only the "self" subsystem state makes use of these,
> move them into a struct of their own. This allows for a new compact
> cgroup_rstat_cpu struct that the formal subsystems can make use of.
> Where applicable, decide on whether to allocate the compact or full
> struct including the base stats.

Mentioning the memory savings in this patch's log would be helpful.

> 
> Signed-off-by: JP Kobryn <inwardvessel@xxxxxxxxx>
> ---
>  include/linux/cgroup-defs.h | 37 ++++++++++++++----------
>  kernel/cgroup/rstat.c       | 57 +++++++++++++++++++++++++------------
>  2 files changed, 61 insertions(+), 33 deletions(-)
> 
> diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
> index 1598e1389615..b0a07c63fd46 100644
> --- a/include/linux/cgroup-defs.h
> +++ b/include/linux/cgroup-defs.h
> @@ -170,7 +170,10 @@ struct cgroup_subsys_state {
>  	struct percpu_ref refcnt;
>  
>  	/* per-cpu recursive resource statistics */
> -	struct cgroup_rstat_cpu __percpu *rstat_cpu;
> +	union {
> +		struct cgroup_rstat_cpu __percpu *rstat_cpu;
> +		struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu;
> +	};
>  
>  	/*
>  	 * siblings list anchored at the parent's ->children
> @@ -356,6 +359,24 @@ struct cgroup_base_stat {
>   * resource statistics on top of it - bsync, bstat and last_bstat.
>   */
>  struct cgroup_rstat_cpu {
> +	/*
> +	 * Child cgroups with stat updates on this cpu since the last read
> +	 * are linked on the parent's ->updated_children through
> +	 * ->updated_next.
> +	 *
> +	 * In addition to being more compact, singly-linked list pointing
> +	 * to the cgroup makes it unnecessary for each per-cpu struct to
> +	 * point back to the associated cgroup.
> +	 *
> +	 * Protected by per-cpu cgroup_rstat_cpu_lock.

I just noticed, the patch that split the lock should have updated this
comment.

> +	 */
> +	struct cgroup_subsys_state *updated_children;	/* terminated by self */
> +	struct cgroup_subsys_state *updated_next;		/* NULL if not on list */
> +};
> +
> +struct cgroup_rstat_base_cpu {
> +	struct cgroup_rstat_cpu self;

Why 'self'? Why not 'rstat_cpu' like it's named in struct
cgroup_subsys_state?

> +
>  	/*
>  	 * ->bsync protects ->bstat.  These are the only fields which get
>  	 * updated in the hot path.
> @@ -382,20 +403,6 @@ struct cgroup_rstat_cpu {
>  	 * deltas to propagate to the per-cpu subtree_bstat.
>  	 */
>  	struct cgroup_base_stat last_subtree_bstat;
> -
> -	/*
> -	 * Child cgroups with stat updates on this cpu since the last read
> -	 * are linked on the parent's ->updated_children through
> -	 * ->updated_next.
> -	 *
> -	 * In addition to being more compact, singly-linked list pointing
> -	 * to the cgroup makes it unnecessary for each per-cpu struct to
> -	 * point back to the associated cgroup.
> -	 *
> -	 * Protected by per-cpu cgroup_rstat_cpu_lock.
> -	 */
> -	struct cgroup_subsys_state *updated_children;	/* terminated by self */
> -	struct cgroup_subsys_state *updated_next;		/* NULL if not on list */
>  };
>  
>  struct cgroup_freezer_state {
> diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
> index b3eaefc1fd07..c08ebe2f9568 100644
> --- a/kernel/cgroup/rstat.c
> +++ b/kernel/cgroup/rstat.c
> @@ -24,6 +24,12 @@ static struct cgroup_rstat_cpu *cgroup_rstat_cpu(
>  	return per_cpu_ptr(css->rstat_cpu, cpu);
>  }
>  
> +static struct cgroup_rstat_base_cpu *cgroup_rstat_base_cpu(
> +		struct cgroup_subsys_state *css, int cpu)
> +{
> +	return per_cpu_ptr(css->rstat_base_cpu, cpu);
> +}
> +
>  static inline bool is_base_css(struct cgroup_subsys_state *css)
>  {
>  	return css->ss == NULL;
> @@ -438,17 +444,31 @@ int cgroup_rstat_init(struct cgroup_subsys_state *css)
>  
>  	/* the root cgrp's self css has rstat_cpu preallocated */
>  	if (!css->rstat_cpu) {
> -		css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
> -		if (!css->rstat_cpu)
> -			return -ENOMEM;
> -	}
> +		if (is_base_css(css)) {
> +			css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu);
> +			if (!css->rstat_base_cpu)
> +				return -ENOMEM;
>  
> -	/* ->updated_children list is self terminated */
> -	for_each_possible_cpu(cpu) {
> -		struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(css, cpu);
> +			for_each_possible_cpu(cpu) {
> +				struct cgroup_rstat_base_cpu *rstatc;

We should use different variable names for cgroup_rstat_base_cpu and
cgroup_rstat_cpu throughout. Maybe 'brstatc' or 'rstatbc' for the
latter?

> +
> +				rstatc = cgroup_rstat_base_cpu(css, cpu);
> +				rstatc->self.updated_children = css;
> +				u64_stats_init(&rstatc->bsync);
> +			}
> +		} else {
> +			css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
> +			if (!css->rstat_cpu)
> +				return -ENOMEM;
> +
> +			for_each_possible_cpu(cpu) {
> +				struct cgroup_rstat_cpu *rstatc;
> +
> +				rstatc = cgroup_rstat_cpu(css, cpu);
> +				rstatc->updated_children = css;
> +			}
> +		}
>  
> -		rstatc->updated_children = css;
> -		u64_stats_init(&rstatc->bsync);

I think there's too much replication here. We can probably do something
like this (untested):

diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index de057c992f824..1750a69887a2e 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -443,34 +443,30 @@ int cgroup_rstat_init(struct cgroup_subsys_state *css)
 	int cpu;
 
 	/* the root cgrp's self css has rstat_cpu preallocated */
-	if (!css->rstat_cpu) {
-		if (is_base_css(css)) {
-			css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu);
-			if (!css->rstat_base_cpu)
-				return -ENOMEM;
-
-			for_each_possible_cpu(cpu) {
-				struct cgroup_rstat_base_cpu *rstatc;
+	if (css->rstat_cpu)
+		return 0;
 
-				rstatc = cgroup_rstat_base_cpu(css, cpu);
-				rstatc->self.updated_children = css;
-				u64_stats_init(&rstatc->bsync);
-			}
-		} else {
-			css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
-			if (!css->rstat_cpu)
-				return -ENOMEM;
+	if (is_base_css(css)) {
+		css->rstat_base_cpu = alloc_percpu(struct cgroup_rstat_base_cpu);
+		if (!css->rstat_base_cpu)
+			return -ENOMEM;
+	} else {
+		css->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
+		if (!css->rstat_cpu)
+			return -ENOMEM;
+	}
 
-			for_each_possible_cpu(cpu) {
-				struct cgroup_rstat_cpu *rstatc;
+	for_each_possible_cpu(cpu) {
+		struct cgroup_rstat_base_cpu *brstatc = NULL;
+		struct cgroup_rstat_cpu *rstatc;
 
-				rstatc = cgroup_rstat_cpu(css, cpu);
-				rstatc->updated_children = css;
-			}
+		if (is_base_css(css)) {
+			brstatc = cgroup_rstat_base_cpu(css, cpu);
+			u64_stats_init(&brstatc->bsync);
+			rstatc = brstatc->self;
 		}
-
+		rstatc->updated_children = css;
 	}
-
 	return 0;
 }
 
>  	}
>  
>  	return 0;
> @@ -522,9 +542,10 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat,
>  
>  static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
>  {
> -	struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(&cgrp->self, cpu);
> +	struct cgroup_rstat_base_cpu *rstatc = cgroup_rstat_base_cpu(
> +			&cgrp->self, cpu);
>  	struct cgroup *parent = cgroup_parent(cgrp);
> -	struct cgroup_rstat_cpu *prstatc;
> +	struct cgroup_rstat_base_cpu *prstatc;

Same here, we should use different names than rstatc and prstatc. Same
applies for the rest of the diff.

>  	struct cgroup_base_stat delta;
>  	unsigned seq;
>  
> @@ -552,25 +573,25 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
>  		cgroup_base_stat_add(&cgrp->last_bstat, &delta);
>  
>  		delta = rstatc->subtree_bstat;
> -		prstatc = cgroup_rstat_cpu(&parent->self, cpu);
> +		prstatc = cgroup_rstat_base_cpu(&parent->self, cpu);
>  		cgroup_base_stat_sub(&delta, &rstatc->last_subtree_bstat);
>  		cgroup_base_stat_add(&prstatc->subtree_bstat, &delta);
>  		cgroup_base_stat_add(&rstatc->last_subtree_bstat, &delta);
>  	}
>  }
>  
> -static struct cgroup_rstat_cpu *
> +static struct cgroup_rstat_base_cpu *
>  cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags)
>  {
> -	struct cgroup_rstat_cpu *rstatc;
> +	struct cgroup_rstat_base_cpu *rstatc;
>  
> -	rstatc = get_cpu_ptr(cgrp->self.rstat_cpu);
> +	rstatc = get_cpu_ptr(cgrp->self.rstat_base_cpu);
>  	*flags = u64_stats_update_begin_irqsave(&rstatc->bsync);
>  	return rstatc;
>  }
>  
>  static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
> -						 struct cgroup_rstat_cpu *rstatc,
> +						 struct cgroup_rstat_base_cpu *rstatc,
>  						 unsigned long flags)
>  {
>  	u64_stats_update_end_irqrestore(&rstatc->bsync, flags);
> @@ -580,7 +601,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
>  
>  void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
>  {
> -	struct cgroup_rstat_cpu *rstatc;
> +	struct cgroup_rstat_base_cpu *rstatc;
>  	unsigned long flags;
>  
>  	rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
> @@ -591,7 +612,7 @@ void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
>  void __cgroup_account_cputime_field(struct cgroup *cgrp,
>  				    enum cpu_usage_stat index, u64 delta_exec)
>  {
> -	struct cgroup_rstat_cpu *rstatc;
> +	struct cgroup_rstat_base_cpu *rstatc;
>  	unsigned long flags;
>  
>  	rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags);
> -- 
> 2.43.5
> 
> 




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux