On Tue, Sep 10, 2024 at 11:43:46PM +0000, Ackerley Tng wrote: > +static struct folio *kvm_gmem_hugetlb_alloc_folio(struct hstate *h, > + struct hugepage_subpool *spool) > +{ > + bool memcg_charge_was_prepared; > + struct mem_cgroup *memcg; > + struct mempolicy *mpol; > + nodemask_t *nodemask; > + struct folio *folio; > + gfp_t gfp_mask; > + int ret; > + int nid; > + > + gfp_mask = htlb_alloc_mask(h); > + > + memcg = get_mem_cgroup_from_current(); > + ret = mem_cgroup_hugetlb_try_charge(memcg, > + gfp_mask | __GFP_RETRY_MAYFAIL, > + pages_per_huge_page(h)); > + if (ret == -ENOMEM) > + goto err; > + > + memcg_charge_was_prepared = ret != -EOPNOTSUPP; > + > + /* Pages are only to be taken from guest_memfd subpool and nowhere else. */ > + if (hugepage_subpool_get_pages(spool, 1)) > + goto err_cancel_charge; > + > + nid = kvm_gmem_get_mpol_node_nodemask(htlb_alloc_mask(h), &mpol, > + &nodemask); > + /* > + * charge_cgroup_reservation is false because we didn't make any cgroup > + * reservations when creating the guest_memfd subpool. Hmm.. isn't this the exact reason to set charge_cgroup_reservation==true instead? IIUC gmem hugetlb pages should participate in the hugetlb cgroup resv charge as well. It is already involved in the rest cgroup charges, and I wonder whether it's intended that the patch treated the resv accounting specially. Thanks, > + * > + * use_hstate_resv is true because we reserved from global hstate when > + * creating the guest_memfd subpool. > + */ > + folio = hugetlb_alloc_folio(h, mpol, nid, nodemask, false, true); > + mpol_cond_put(mpol); > + > + if (!folio) > + goto err_put_pages; > + > + hugetlb_set_folio_subpool(folio, spool); > + > + if (memcg_charge_was_prepared) > + mem_cgroup_commit_charge(folio, memcg); > + > +out: > + mem_cgroup_put(memcg); > + > + return folio; > + > +err_put_pages: > + hugepage_subpool_put_pages(spool, 1); > + > +err_cancel_charge: > + if (memcg_charge_was_prepared) > + mem_cgroup_cancel_charge(memcg, pages_per_huge_page(h)); > + > +err: > + folio = ERR_PTR(-ENOMEM); > + goto out; > +} -- Peter Xu