On Wed, Mar 03, 2021 at 01:59:16PM +0800, Muchun Song wrote: > The remote memcg charing APIs is a mechanism to charge pages to a given > memcg. Since all kernel memory are charged by using obj_cgroup APIs. > Actually, we want to charge kernel memory to the remote object cgroup > instead of memory cgroup. So introduce remote objcg charging APIs to > charge the kmem pages by using objcg_cgroup APIs. And if remote memcg > and objcg are both set, objcg takes precedence over memcg to charge > the kmem pages. > > In the later patch, we will use those API to charge kernel memory to > the remote objcg. I'd abandon/postpone the rest of the patchset (patches 4 and 5) as now. They add a lot of new code to solve a theoretical problem (please, fix me if I'm wrong), which is not a panic or data corruption, but a sub-optimal garbage collection behavior. I think we need a better motivation or/and an implementation which makes the code simpler and smaller. > > Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> > --- > include/linux/sched.h | 4 ++++ > include/linux/sched/mm.h | 38 ++++++++++++++++++++++++++++++++++++++ > kernel/fork.c | 3 +++ > mm/memcontrol.c | 44 ++++++++++++++++++++++++++++++++++++++++---- > 4 files changed, 85 insertions(+), 4 deletions(-) > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index ee46f5cab95b..8edcc71a0a1d 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1318,6 +1318,10 @@ struct task_struct { > /* Used by memcontrol for targeted memcg charge: */ > struct mem_cgroup *active_memcg; > #endif > +#ifdef CONFIG_MEMCG_KMEM > + /* Used by memcontrol for targeted objcg charge: */ > + struct obj_cgroup *active_objcg; > +#endif > > #ifdef CONFIG_BLK_CGROUP > struct request_queue *throttle_queue; > diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h > index 1ae08b8462a4..be1189598b09 100644 > --- a/include/linux/sched/mm.h > +++ b/include/linux/sched/mm.h > @@ -330,6 +330,44 @@ set_active_memcg(struct mem_cgroup *memcg) > } > #endif > > +#ifdef CONFIG_MEMCG_KMEM > +DECLARE_PER_CPU(struct obj_cgroup *, int_active_objcg); > + > +/** > + * set_active_objcg - Starts the remote objcg kmem pages charging scope. > + * @objcg: objcg to charge. > + * > + * This function marks the beginning of the remote objcg charging scope. All the > + * __GFP_ACCOUNT kmem page allocations till the end of the scope will be charged > + * to the given objcg. > + * > + * NOTE: This function can nest. Users must save the return value and > + * reset the previous value after their own charging scope is over. > + * > + * If remote memcg and objcg are both set, objcg takes precedence over memcg > + * to charge the kmem pages. > + */ > +static inline struct obj_cgroup *set_active_objcg(struct obj_cgroup *objcg) > +{ > + struct obj_cgroup *old; > + > + if (in_interrupt()) { > + old = this_cpu_read(int_active_objcg); > + this_cpu_write(int_active_objcg, objcg); > + } else { > + old = current->active_objcg; > + current->active_objcg = objcg; > + } > + > + return old; > +} > +#else > +static inline struct obj_cgroup *set_active_objcg(struct obj_cgroup *objcg) > +{ > + return NULL; > +} > +#endif > + > #ifdef CONFIG_MEMBARRIER > enum { > MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), > diff --git a/kernel/fork.c b/kernel/fork.c > index d66cd1014211..b4b9dd5d122f 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -945,6 +945,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) > #ifdef CONFIG_MEMCG > tsk->active_memcg = NULL; > #endif > +#ifdef CONFIG_MEMCG_KMEM > + tsk->active_objcg = NULL; > +#endif > return tsk; > > free_stack: > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 0cf342d22547..e48d4ab0af76 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -79,6 +79,11 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; > /* Active memory cgroup to use from an interrupt context */ > DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg); > > +#ifdef CONFIG_MEMCG_KMEM > +/* Active object cgroup to use from an interrupt context */ > +DEFINE_PER_CPU(struct obj_cgroup *, int_active_objcg); > +#endif > + > /* Socket memory accounting disabled? */ > static bool cgroup_memory_nosocket; > > @@ -1076,7 +1081,7 @@ static __always_inline struct mem_cgroup *get_active_memcg(void) > return memcg; > } > > -static __always_inline bool memcg_kmem_bypass(void) > +static __always_inline bool memcg_charge_bypass(void) > { > /* Allow remote memcg charging from any context. */ > if (unlikely(active_memcg())) > @@ -1094,7 +1099,7 @@ static __always_inline bool memcg_kmem_bypass(void) > */ > static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void) > { > - if (memcg_kmem_bypass()) > + if (memcg_charge_bypass()) > return NULL; > > if (unlikely(active_memcg())) > @@ -1103,6 +1108,29 @@ static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void) > return get_mem_cgroup_from_mm(current->mm); > } > > +#ifdef CONFIG_MEMCG_KMEM > +static __always_inline struct obj_cgroup *active_objcg(void) > +{ > + if (in_interrupt()) > + return this_cpu_read(int_active_objcg); > + else > + return current->active_objcg; > +} > + > +static __always_inline bool kmem_charge_bypass(void) > +{ > + /* Allow remote charging from any context. */ > + if (unlikely(active_objcg() || active_memcg())) > + return false; > + > + /* Memcg to charge can't be determined. */ > + if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD)) > + return true; > + > + return false; > +} > +#endif > + > /** > * mem_cgroup_iter - iterate over memory cgroup hierarchy > * @root: hierarchy root > @@ -2997,13 +3025,20 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) > > __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) > { > - struct obj_cgroup *objcg = NULL; > + struct obj_cgroup *objcg; > struct mem_cgroup *memcg; > > - if (memcg_kmem_bypass()) > + if (kmem_charge_bypass()) > return NULL; > > rcu_read_lock(); > + objcg = active_objcg(); > + if (unlikely(objcg)) { > + /* remote object cgroup must hold a reference. */ > + obj_cgroup_get(objcg); > + goto out; > + } > + > if (unlikely(active_memcg())) > memcg = active_memcg(); > else > @@ -3015,6 +3050,7 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) > break; > objcg = NULL; > } > +out: > rcu_read_unlock(); > > return objcg; > -- > 2.11.0 >