Re: + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/18/23 20:08, Roman Gushchin wrote:
> From 14f998a9235fdfa88a4ebfad5802bdde6195bfae Mon Sep 17 00:00:00 2001
> From: Roman Gushchin <roman.gushchin@xxxxxxxxx>
> Date: Mon, 19 Dec 2022 15:46:18 -0800
> Subject: [PATCH v4 2/5] mm: kmem: add direct objcg pointer to task_struct
> 
> To charge a freshly allocated kernel object to a memory cgroup, the
> kernel needs to obtain an objcg pointer. Currently it does it
> indirectly by obtaining the memcg pointer first and then calling to
> __get_obj_cgroup_from_memcg().
> 
> Usually tasks spend their entire life belonging to the same object
> cgroup. So it makes sense to save the objcg pointer on task_struct
> directly, so it can be obtained faster. It requires some work on fork,
> exit and cgroup migrate paths, but these paths are way colder.
> 
> To avoid any costly synchronization the following rules are applied:
> 1) A task sets it's objcg pointer itself.
> 
> 2) If a task is being migrated to another cgroup, the least
>    significant bit of the objcg pointer is set atomically.
> 
> 3) On the allocation path the objcg pointer is obtained locklessly
>    using the READ_ONCE() macro and the least significant bit is
>    checked. If it's set, the following procedure is used to update
>    it locklessly:
>        - task->objcg is zeroed using cmpxcg
>        - new objcg pointer is obtained
>        - task->objcg is updated using try_cmpxchg
>        - operation is repeated if try_cmpxcg fails
>    It guarantees that no updates will be lost if task migration
>    is racing against objcg pointer update. It also allows to keep
>    both read and write paths fully lockless.
> 
> Because the task is keeping a reference to the objcg, it can't go away
> while the task is alive.
> 
> This commit doesn't change the way the remote memcg charging works.
> 
> Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@xxxxxxxxx>
> Tested-by: Naresh Kamboju <naresh.kamboju@xxxxxxxxxx>
> Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx>
> ---
>  include/linux/sched.h |   4 ++
>  mm/memcontrol.c       | 138 +++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 133 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 77f01ac385f7..60de42715b56 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1443,6 +1443,10 @@ struct task_struct {
>  	struct mem_cgroup		*active_memcg;
>  #endif
>  
> +#ifdef CONFIG_MEMCG_KMEM
> +	struct obj_cgroup		*objcg;
> +#endif
> +
>  #ifdef CONFIG_BLK_CGROUP
>  	struct gendisk			*throttle_disk;
>  #endif
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 16ac2a5838fb..d51b87cc8d97 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -249,6 +249,9 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
>  	return container_of(vmpr, struct mem_cgroup, vmpressure);
>  }
>  
> +#define CURRENT_OBJCG_UPDATE_BIT 0
> +#define CURRENT_OBJCG_UPDATE_FLAG (1UL << CURRENT_OBJCG_UPDATE_BIT)
> +
>  #ifdef CONFIG_MEMCG_KMEM
>  static DEFINE_SPINLOCK(objcg_lock);
>  
> @@ -3001,6 +3004,57 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
>  	return objcg;
>  }
>  
> +static struct obj_cgroup *current_objcg_update(void)
> +{
> +	struct mem_cgroup *memcg;
> +	struct obj_cgroup *old, *objcg = NULL;
> +
> +	do {
> +		/* Atomically drop the update bit. */
> +		old = xchg(&current->objcg, NULL);
> +		if (old) {
> +			old = (struct obj_cgroup *)
> +				((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
> +			if (old)
> +				obj_cgroup_put(old);
> +
> +			old = NULL;
> +		}
> +
> +		/*
> +		 * Release the objcg pointer from the previous iteration,
> +		 * if try_cmpxcg() below fails.
> +		 */
> +		if (unlikely(objcg))
> +			obj_cgroup_put(objcg);
> +
> +		/* Obtain the new objcg pointer. */
> +		rcu_read_lock();
> +		memcg = mem_cgroup_from_task(current);

Btw, can this return the root_mem_cgroup? If yes, then the for loop below
doesn't do even a single iteration, and we might have a stale pointer in
objcg? Should we set it to NULL after dropping the reference above?
(But even before the series, the similar loop in
__get_obj_cgroup_from_memcg() would mean no objcg is obtained in such case,
I guess that's just a part of the design that root memcg doesn't have an objcg).

> +		/*
> +		 * The current task can be asynchronously moved to another
> +		 * memcg and the previous memcg can be offlined. So let's
> +		 * get the memcg pointer and try get a reference to objcg
> +		 * under a rcu read lock.
> +		 */
> +		for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
> +			objcg = rcu_dereference(memcg->objcg);
> +			if (likely(objcg && obj_cgroup_tryget(objcg)))
> +				break;
> +			objcg = NULL;
> +		}
> +		rcu_read_unlock();
> +
> +		/*
> +		 * Try set up a new objcg pointer atomically. If it
> +		 * fails, it means the update flag was set concurrently, so
> +		 * the whole procedure should be repeated.
> +		 */
> +	} while (!try_cmpxchg(&current->objcg, &old, objcg));
> +
> +	return objcg;
> +}
> +
>  __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
>  {
>  	struct mem_cgroup *memcg;
> @@ -3008,19 +3062,26 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
>  
>  	if (in_task()) {
>  		memcg = current->active_memcg;
> +		if (unlikely(memcg))
> +			goto from_memcg;
>  
> -		/* Memcg to charge can't be determined. */
> -		if (likely(!memcg) && (!current->mm || (current->flags & PF_KTHREAD)))
> -			return NULL;
> +		objcg = READ_ONCE(current->objcg);
> +		if (unlikely((unsigned long)objcg & CURRENT_OBJCG_UPDATE_FLAG))
> +			objcg = current_objcg_update();
> +
> +		if (objcg) {
> +			obj_cgroup_get(objcg);
> +			return objcg;
> +		}
>  	} else {
>  		memcg = this_cpu_read(int_active_memcg);
> -		if (likely(!memcg))
> -			return NULL;
> +		if (unlikely(memcg))
> +			goto from_memcg;
>  	}
> +	return NULL;
>  
> +from_memcg:
>  	rcu_read_lock();
> -	if (!memcg)
> -		memcg = mem_cgroup_from_task(current);
>  	objcg = __get_obj_cgroup_from_memcg(memcg);
>  	rcu_read_unlock();
>  	return objcg;
> @@ -6345,6 +6406,7 @@ static void mem_cgroup_move_task(void)
>  		mem_cgroup_clear_mc();
>  	}
>  }
> +
>  #else	/* !CONFIG_MMU */
>  static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
>  {
> @@ -6358,8 +6420,39 @@ static void mem_cgroup_move_task(void)
>  }
>  #endif
>  
> +#ifdef CONFIG_MEMCG_KMEM
> +static void mem_cgroup_fork(struct task_struct *task)
> +{
> +	/*
> +	 * Set the update flag to cause task->objcg to be initialized lazily
> +	 * on the first allocation. It can be done without any synchronization
> +	 * because it's always performed on the current task, so does
> +	 * current_objcg_update().
> +	 */
> +	task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG;
> +}
> +
> +static void mem_cgroup_exit(struct task_struct *task)
> +{
> +	struct obj_cgroup *objcg = task->objcg;
> +
> +	objcg = (struct obj_cgroup *)
> +		((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
> +	if (objcg)
> +		obj_cgroup_put(objcg);
> +
> +	/*
> +	 * Some kernel allocations can happen after this point,
> +	 * but let's ignore them. It can be done without any synchronization
> +	 * because it's always performed on the current task, so does
> +	 * current_objcg_update().
> +	 */
> +	task->objcg = NULL;
> +}
> +#endif
> +
>  #ifdef CONFIG_LRU_GEN
> -static void mem_cgroup_attach(struct cgroup_taskset *tset)
> +static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
>  {
>  	struct task_struct *task;
>  	struct cgroup_subsys_state *css;
> @@ -6377,10 +6470,31 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
>  	task_unlock(task);
>  }
>  #else
> +static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
> +#endif /* CONFIG_LRU_GEN */
> +
> +#ifdef CONFIG_MEMCG_KMEM
> +static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
> +{
> +	struct task_struct *task;
> +	struct cgroup_subsys_state *css;
> +
> +	cgroup_taskset_for_each(task, css, tset) {
> +		/* atomically set the update bit */
> +		set_bit(CURRENT_OBJCG_UPDATE_BIT, (unsigned long *)&task->objcg);
> +	}
> +}
> +#else
> +static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
> +#endif /* CONFIG_MEMCG_KMEM */
> +
> +#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
>  static void mem_cgroup_attach(struct cgroup_taskset *tset)
>  {
> +	mem_cgroup_lru_gen_attach(tset);
> +	mem_cgroup_kmem_attach(tset);
>  }
> -#endif /* CONFIG_LRU_GEN */
> +#endif
>  
>  static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
>  {
> @@ -6824,9 +6938,15 @@ struct cgroup_subsys memory_cgrp_subsys = {
>  	.css_reset = mem_cgroup_css_reset,
>  	.css_rstat_flush = mem_cgroup_css_rstat_flush,
>  	.can_attach = mem_cgroup_can_attach,
> +#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
>  	.attach = mem_cgroup_attach,
> +#endif
>  	.cancel_attach = mem_cgroup_cancel_attach,
>  	.post_attach = mem_cgroup_move_task,
> +#ifdef CONFIG_MEMCG_KMEM
> +	.fork = mem_cgroup_fork,
> +	.exit = mem_cgroup_exit,
> +#endif
>  	.dfl_cftypes = memory_files,
>  	.legacy_cftypes = mem_cgroup_legacy_files,
>  	.early_init = 0,




[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux