When doing broader test, we noticed allocation slowness in one test case that malloc memory with size which is slightly bigger than free memory of targeted nodes, but much less then the total free memory of system. The reason is the code enters the slowpath of __alloc_pages_nodemask(), which takes quite some time. As alloc_pages_policy() will give it a 2nd try with NULL nodemask, so there is no need to enter the slowpath for the first try. Add a new gfp bit to skip the slowpath, so that user cases like this can leverage. With it, the malloc in such case is much accelerated as it never enters the slowpath. Adding a new gfp_mask bit is generally not liked, and another idea is to add another nodemask to struct 'alloc_context', so it has 2: 'preferred-nmask' and 'fallback-nmask', and they will be tried in turn if not NULL, with it we can call __alloc_pages_nodemask() only once. Signed-off-by: Feng Tang <feng.tang@xxxxxxxxx> --- include/linux/gfp.h | 9 +++++++-- mm/mempolicy.c | 2 +- mm/page_alloc.c | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6e479e9..81bacbe 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -39,8 +39,9 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x100000u #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u +#define ___GFP_NO_SLOWPATH 0x800000u #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x800000u +#define ___GFP_NOLOCKDEP 0x1000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -220,11 +221,15 @@ struct vm_area_struct; #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +/* Do not go into the slowpath */ +#define __GFP_NO_SLOWPATH ((__force gfp_t)___GFP_NO_SLOWPATH) + /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d66c1c0..e84b56d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2206,7 +2206,7 @@ static struct page *alloc_pages_policy(struct mempolicy *pol, gfp_t gfp, * +-------------------------------+---------------+------------+ */ if (pol->mode == MPOL_PREFERRED_MANY) - gfp_mask |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN; + gfp_mask |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN | __GFP_NO_SLOWPATH; page = __alloc_pages_nodemask(gfp_mask, order, policy_node(gfp, pol, preferred_nid), diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 519a60d..969e3a1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4993,7 +4993,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, /* First allocation attempt */ page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); - if (likely(page)) + if (likely(page) || (gfp_mask & __GFP_NO_SLOWPATH)) goto out; /* -- 2.7.4