On Mon, Mar 29, 2021 at 10:49 PM Dan Schatzberg <schatzberg.dan@xxxxxxxxx> wrote: > > set_active_memcg() worked for kernel allocations but was silently > ignored for user pages. > > This patch establishes a precedence order for who gets charged: > > 1. If there is a memcg associated with the page already, that memcg is > charged. This happens during swapin. > > 2. If an explicit mm is passed, mm->memcg is charged. This happens > during page faults, which can be triggered in remote VMs (eg gup). > > 3. Otherwise consult the current process context. If there is an > active_memcg, use that. Otherwise, current->mm->memcg. > > Previously, if a NULL mm was passed to mem_cgroup_charge (case 3) it > would always charge the root cgroup. Now it looks up the active_memcg > first (falling back to charging the root cgroup if not set). > > Signed-off-by: Dan Schatzberg <schatzberg.dan@xxxxxxxxx> > Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx> > Acked-by: Tejun Heo <tj@xxxxxxxxxx> > Acked-by: Chris Down <chris@xxxxxxxxxxxxxx> > Reviewed-by: Shakeel Butt <shakeelb@xxxxxxxxxx> > --- > mm/filemap.c | 2 +- > mm/memcontrol.c | 72 ++++++++++++++++++++++++++++--------------------- > mm/shmem.c | 4 +-- > 3 files changed, 44 insertions(+), 34 deletions(-) > > diff --git a/mm/filemap.c b/mm/filemap.c > index eeeb8e2cc36a..63fd980e863a 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -872,7 +872,7 @@ noinline int __add_to_page_cache_locked(struct page *page, > page->index = offset; > > if (!huge) { > - error = mem_cgroup_charge(page, current->mm, gfp); > + error = mem_cgroup_charge(page, NULL, gfp); > if (error) > goto error; > charged = true; > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 668d1d7c2645..adc618814fd2 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -884,13 +884,38 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) > } > EXPORT_SYMBOL(mem_cgroup_from_task); > > +static __always_inline struct mem_cgroup *active_memcg(void) > +{ > + if (in_interrupt()) > + return this_cpu_read(int_active_memcg); > + else > + return current->active_memcg; > +} > + > +static __always_inline struct mem_cgroup *get_active_memcg(void) > +{ > + struct mem_cgroup *memcg; > + > + rcu_read_lock(); > + memcg = active_memcg(); > + /* remote memcg must hold a ref. */ > + if (memcg && WARN_ON_ONCE(!css_tryget(&memcg->css))) > + memcg = root_mem_cgroup; > + rcu_read_unlock(); > + > + return memcg; > +} This function is already removed since the patchset below. Use obj_cgroup APIs to charge kmem pages https://lore.kernel.org/patchwork/cover/1399132/ I also suggest not reintroducing get_active_memcg. There is only one user of it, just inline it into get_mem_cgroup_from_mm(). Actually we don’t need get_active_memcg() either. > + > /** > * get_mem_cgroup_from_mm: Obtain a reference on given mm_struct's memcg. > * @mm: mm from which memcg should be extracted. It can be NULL. > * > - * Obtain a reference on mm->memcg and returns it if successful. Otherwise > - * root_mem_cgroup is returned. However if mem_cgroup is disabled, NULL is > - * returned. > + * Obtain a reference on mm->memcg and returns it if successful. If mm > + * is NULL, then the memcg is chosen as follows: > + * 1) The active memcg, if set. > + * 2) current->mm->memcg, if available > + * 3) root memcg > + * If mem_cgroup is disabled, NULL is returned. > */ > struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) > { > @@ -899,13 +924,19 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) > if (mem_cgroup_disabled()) > return NULL; > > + /* > + * Page cache insertions can happen without an > + * actual mm context, e.g. during disk probing > + * on boot, loopback IO, acct() writes etc. > + */ > + if (unlikely(!mm)) { > + if (unlikely(active_memcg())) > + return get_active_memcg(); Since remote memcg must hold a reference, we do not need to do something like get_active_memcg() does. Just use css_get to obtain a ref, it is simpler. Just Like below. + if (unlikely(!mm)) { + memcg = active_memcg(); + if (unlikely(memcg)) { + /* remote memcg must hold a ref. */ + css_get(memcg); + return memcg; + } Thanks. > + mm = current->mm; > + } > + > rcu_read_lock(); > do { > - /* > - * Page cache insertions can happen withou an > - * actual mm context, e.g. during disk probing > - * on boot, loopback IO, acct() writes etc. > - */ > if (unlikely(!mm)) > memcg = root_mem_cgroup; > else { > @@ -919,28 +950,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) > } > EXPORT_SYMBOL(get_mem_cgroup_from_mm); > > -static __always_inline struct mem_cgroup *active_memcg(void) > -{ > - if (in_interrupt()) > - return this_cpu_read(int_active_memcg); > - else > - return current->active_memcg; > -} > - > -static __always_inline struct mem_cgroup *get_active_memcg(void) > -{ > - struct mem_cgroup *memcg; > - > - rcu_read_lock(); > - memcg = active_memcg(); > - /* remote memcg must hold a ref. */ > - if (memcg && WARN_ON_ONCE(!css_tryget(&memcg->css))) > - memcg = root_mem_cgroup; > - rcu_read_unlock(); > - > - return memcg; > -} > - > static __always_inline bool memcg_kmem_bypass(void) > { > /* Allow remote memcg charging from any context. */ > @@ -6549,7 +6558,8 @@ static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, > * @gfp_mask: reclaim mode > * > * Try to charge @page to the memcg that @mm belongs to, reclaiming > - * pages according to @gfp_mask if necessary. > + * pages according to @gfp_mask if necessary. if @mm is NULL, try to > + * charge to the active memcg. > * > * Do not use this for pages allocated for swapin. > * > diff --git a/mm/shmem.c b/mm/shmem.c > index 78ab81a62b29..7c09276125d5 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1694,7 +1694,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, > { > struct address_space *mapping = inode->i_mapping; > struct shmem_inode_info *info = SHMEM_I(inode); > - struct mm_struct *charge_mm = vma ? vma->vm_mm : current->mm; > + struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL; > struct page *page; > swp_entry_t swap; > int error; > @@ -1815,7 +1815,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, > } > > sbinfo = SHMEM_SB(inode->i_sb); > - charge_mm = vma ? vma->vm_mm : current->mm; > + charge_mm = vma ? vma->vm_mm : NULL; > > page = pagecache_get_page(mapping, index, > FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0); > -- > 2.30.2 >