On Fri, Nov 08, 2024 at 01:29:45PM -0800, Joshua Hahn wrote: > This patch introduces mem_cgroup_charge_hugetlb, which combines the > logic of mem_cgroup{try,commit}_hugetlb. This reduces the footprint of > memcg in hugetlb code, and also consolidates the error path that memcg > can take into just one point. > > Signed-off-by: Joshua Hahn <joshua.hahnjy@xxxxxxxxx> > > --- > include/linux/memcontrol.h | 2 ++ > mm/hugetlb.c | 34 ++++++++++++---------------------- > mm/memcontrol.c | 19 +++++++++++++++++++ > 3 files changed, 33 insertions(+), 22 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index bb49e0d4b377..d90c1ac791f1 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -678,6 +678,8 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, > int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, > long nr_pages); > > +int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp); > + > int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, > gfp_t gfp, swp_entry_t entry); > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index fbb10e52d7ea..6f6841483039 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -2967,21 +2967,13 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, > struct hugepage_subpool *spool = subpool_vma(vma); > struct hstate *h = hstate_vma(vma); > struct folio *folio; > - long map_chg, map_commit, nr_pages = pages_per_huge_page(h); > + long map_chg, map_commit; > long gbl_chg; > - int memcg_charge_ret, ret, idx; > + int ret, idx; > struct hugetlb_cgroup *h_cg = NULL; > - struct mem_cgroup *memcg; > bool deferred_reserve; > gfp_t gfp = htlb_alloc_mask(h) | __GFP_RETRY_MAYFAIL; > > - memcg = get_mem_cgroup_from_current(); > - memcg_charge_ret = mem_cgroup_hugetlb_try_charge(memcg, gfp, nr_pages); > - if (memcg_charge_ret == -ENOMEM) { > - mem_cgroup_put(memcg); > - return ERR_PTR(-ENOMEM); > - } > - > idx = hstate_index(h); > /* > * Examine the region/reserve map to determine if the process > @@ -2989,12 +2981,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, > * code of zero indicates a reservation exists (no change). > */ > map_chg = gbl_chg = vma_needs_reservation(h, vma, addr); > - if (map_chg < 0) { > - if (!memcg_charge_ret) > - mem_cgroup_cancel_charge(memcg, nr_pages); > - mem_cgroup_put(memcg); > + if (map_chg < 0) > return ERR_PTR(-ENOMEM); > - } > > /* > * Processes that did not create the mapping will have no > @@ -3092,10 +3080,15 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, > } > } > > - if (!memcg_charge_ret) > - mem_cgroup_commit_charge(folio, memcg); > - lruvec_stat_mod_folio(folio, NR_HUGETLB, pages_per_huge_page(h)); > - mem_cgroup_put(memcg); > + ret = mem_cgroup_charge_hugetlb(folio, gfp); > + if (ret == -ENOMEM) { > + spin_unlock_irq(&hugetlb_lock); spin_unlock_irq?? > + free_huge_folio(folio); free_huge_folio() will call lruvec_stat_mod_folio() unconditionally but you are only calling it on success. This may underflow the metric. > + return ERR_PTR(-ENOMEM); > + } > + else if (!ret) > + lruvec_stat_mod_folio(folio, NR_HUGETLB, > + pages_per_huge_page(h)); > > return folio; > > @@ -3110,9 +3103,6 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, > hugepage_subpool_put_pages(spool, 1); > out_end_reservation: > vma_end_reservation(h, vma, addr); > - if (!memcg_charge_ret) > - mem_cgroup_cancel_charge(memcg, nr_pages); > - mem_cgroup_put(memcg); > return ERR_PTR(-ENOSPC); > } > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 97f63ec9c9fb..95ee77fe27af 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -4529,6 +4529,25 @@ int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, > return 0; > } > > +int mem_cgroup_charge_hugetlb(struct folio *folio, gfp_t gfp) > +{ > + struct mem_cgroup *memcg = get_mem_cgroup_from_current(); > + int ret = 0; > + > + if (mem_cgroup_disabled() || !memcg_accounts_hugetlb() || > + !memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) { > + ret = -EOPNOTSUPP; why EOPNOTSUPP? You need to return 0 here. We do want lruvec_stat_mod_folio() to be called. > + goto out; > + } > + > + if (charge_memcg(folio, memcg, gfp)) > + ret = -ENOMEM; > + > +out: > + mem_cgroup_put(memcg); > + return ret; > +} > + > /** > * mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin. > * @folio: folio to charge. > -- > 2.43.5 >