On Wed, Aug 03, 2022 at 09:21:22PM +0800, Michal Hocko wrote: > On Thu 04-08-22 05:08:34, Feng Tang wrote: > [...] > > Do we still need the other nodemask API I proposed earlier which has > > no parameter of gfp_flag, and used for __nr_hugepages_store_common? > > I would touch as little code as possible. OK. Please review the following patch, thanks! - Feng --- >From a2db9a57da616bb3ea21e48a4a9ceb5c2cf4f7a2 Mon Sep 17 00:00:00 2001 From: Feng Tang <feng.tang@xxxxxxxxx> Date: Thu, 4 Aug 2022 09:39:24 +0800 Subject: [PATCH RFC] mm/hugetlb: add dedicated func to get 'allowed' nodemask for current process Muchun Song found that after MPOL_PREFERRED_MANY policy was introduced in commit b27abaccf8e8 ("mm/mempolicy: add MPOL_PREFERRED_MANY for multiple preferred nodes") [1], the policy_nodemask_current()'s semantics for this new policy has been changed, which returns 'preferred' nodes instead of 'allowed' nodes, and could hurt the usage of its caller in hugetlb: allowed_mems_nr(). Michal found the policy_nodemask_current() is only used by hugetlb, and suggested to move it to hugetlb code with more explicit name to enforce the 'allowed' semantics for which only MPOL_BIND policy matters. One note for the new policy_mbind_nodemask() is, the cross check from MPOL_BIND, gfp flags and cpuset configuration can lead to a no available node case, which is considered to be broken configuration and 'NULL' (equals all nodes) is returned. [1]. https://lore.kernel.org/lkml/20220801084207.39086-1-songmuchun@xxxxxxxxxxxxx/t/ Reported-by: Muchun Song <songmuchun@xxxxxxxxxxxxx> Suggested-by: Michal Hocko <mhocko@xxxxxxxx> Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx> --- include/linux/mempolicy.h | 32 ++++++++++++++++++++------------ mm/hugetlb.c | 24 ++++++++++++++++++++---- mm/mempolicy.c | 20 -------------------- 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 668389b4b53d..ea0168fffb4c 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -151,13 +151,6 @@ extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); extern nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy); -static inline nodemask_t *policy_nodemask_current(gfp_t gfp) -{ - struct mempolicy *mpol = get_task_policy(current); - - return policy_nodemask(gfp, mpol); -} - extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; @@ -189,6 +182,26 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol) return (pol->mode == MPOL_PREFERRED_MANY); } +static inline int apply_policy_zone(struct mempolicy *policy, enum zone_type zone) +{ + enum zone_type dynamic_policy_zone = policy_zone; + + BUG_ON(dynamic_policy_zone == ZONE_MOVABLE); + + /* + * if policy->nodes has movable memory only, + * we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only. + * + * policy->nodes is intersect with node_states[N_MEMORY]. + * so if the following test fails, it implies + * policy->nodes has movable memory only. + */ + if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY])) + dynamic_policy_zone = ZONE_MOVABLE; + + return zone >= dynamic_policy_zone; +} + #else @@ -294,11 +307,6 @@ static inline void mpol_put_task_policy(struct task_struct *task) { } -static inline nodemask_t *policy_nodemask_current(gfp_t gfp) -{ - return NULL; -} - static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return false; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a18c071c294e..ad84bb85b6de 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4330,18 +4330,34 @@ static int __init default_hugepagesz_setup(char *s) } __setup("default_hugepagesz=", default_hugepagesz_setup); +static nodemask_t *policy_mbind_nodemask(gfp_t gfp) +{ +#ifdef CONFIG_NUMA + struct mempolicy *mpol = get_task_policy(current); + + /* + * Only enforce MPOL_BIND policy which overlaps with cpuset policy + * (from policy_nodemask) specifically for hugetlb case + */ + if (mpol->mode == MPOL_BIND && + (apply_policy_zone(mpol, gfp_zone(gfp)) && + cpuset_nodemask_valid_mems_allowed(&mpol->nodes))) + return &mpol->nodes; +#endif + return NULL; +} + static unsigned int allowed_mems_nr(struct hstate *h) { int node; unsigned int nr = 0; - nodemask_t *mpol_allowed; + nodemask_t *mbind_nodemask; unsigned int *array = h->free_huge_pages_node; gfp_t gfp_mask = htlb_alloc_mask(h); - mpol_allowed = policy_nodemask_current(gfp_mask); - + mbind_nodemask = policy_mbind_nodemask(gfp_mask); for_each_node_mask(node, cpuset_current_mems_allowed) { - if (!mpol_allowed || node_isset(node, *mpol_allowed)) + if (!mbind_nodemask || node_isset(node, *mbind_nodemask)) nr += array[node]; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d39b01fd52fe..5553bd53927f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1805,26 +1805,6 @@ bool vma_policy_mof(struct vm_area_struct *vma) return pol->flags & MPOL_F_MOF; } -static int apply_policy_zone(struct mempolicy *policy, enum zone_type zone) -{ - enum zone_type dynamic_policy_zone = policy_zone; - - BUG_ON(dynamic_policy_zone == ZONE_MOVABLE); - - /* - * if policy->nodes has movable memory only, - * we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only. - * - * policy->nodes is intersect with node_states[N_MEMORY]. - * so if the following test fails, it implies - * policy->nodes has movable memory only. - */ - if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY])) - dynamic_policy_zone = ZONE_MOVABLE; - - return zone >= dynamic_policy_zone; -} - /* * Return a nodemask representing a mempolicy for filtering nodes for * page allocation -- 2.27.0