This is a note to let you know that I've just added the patch titled mm: optimize put_mems_allowed() usage to the 3.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: mm-optimize-put_mems_allowed-usage.patch and it can be found in the queue-3.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From d26914d11751b23ca2e8747725f2cae10c2f2c1b Mon Sep 17 00:00:00 2001 From: Mel Gorman <mgorman@xxxxxxx> Date: Thu, 3 Apr 2014 14:47:24 -0700 Subject: mm: optimize put_mems_allowed() usage From: Mel Gorman <mgorman@xxxxxxx> commit d26914d11751b23ca2e8747725f2cae10c2f2c1b upstream. Since put_mems_allowed() is strictly optional, its a seqcount retry, we don't need to evaluate the function if the allocation was in fact successful, saving a smp_rmb some loads and comparisons on some relative fast-paths. Since the naming, get/put_mems_allowed() does suggest a mandatory pairing, rename the interface, as suggested by Mel, to resemble the seqcount interface. This gives us: read_mems_allowed_begin() and read_mems_allowed_retry(), where it is important to note that the return value of the latter call is inverted from its previous incarnation. Signed-off-by: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Signed-off-by: Mel Gorman <mgorman@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Mel Gorman <mgorman@xxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- include/linux/cpuset.h | 27 ++++++++++++++------------- kernel/cpuset.c | 2 +- mm/filemap.c | 4 ++-- mm/hugetlb.c | 4 ++-- mm/mempolicy.c | 12 ++++++------ mm/page_alloc.c | 8 ++++---- mm/slab.c | 4 ++-- mm/slub.c | 16 +++++++--------- 8 files changed, 38 insertions(+), 39 deletions(-) --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -87,25 +87,26 @@ extern void rebuild_sched_domains(void); extern void cpuset_print_task_mems_allowed(struct task_struct *p); /* - * get_mems_allowed is required when making decisions involving mems_allowed - * such as during page allocation. mems_allowed can be updated in parallel - * and depending on the new value an operation can fail potentially causing - * process failure. A retry loop with get_mems_allowed and put_mems_allowed - * prevents these artificial failures. + * read_mems_allowed_begin is required when making decisions involving + * mems_allowed such as during page allocation. mems_allowed can be updated in + * parallel and depending on the new value an operation can fail potentially + * causing process failure. A retry loop with read_mems_allowed_begin and + * read_mems_allowed_retry prevents these artificial failures. */ -static inline unsigned int get_mems_allowed(void) +static inline unsigned int read_mems_allowed_begin(void) { return read_seqcount_begin(¤t->mems_allowed_seq); } /* - * If this returns false, the operation that took place after get_mems_allowed - * may have failed. It is up to the caller to retry the operation if + * If this returns true, the operation that took place after + * read_mems_allowed_begin may have failed artificially due to a concurrent + * update of mems_allowed. It is up to the caller to retry the operation if * appropriate. */ -static inline bool put_mems_allowed(unsigned int seq) +static inline bool read_mems_allowed_retry(unsigned int seq) { - return !read_seqcount_retry(¤t->mems_allowed_seq, seq); + return read_seqcount_retry(¤t->mems_allowed_seq, seq); } static inline void set_mems_allowed(nodemask_t nodemask) @@ -225,14 +226,14 @@ static inline void set_mems_allowed(node { } -static inline unsigned int get_mems_allowed(void) +static inline unsigned int read_mems_allowed_begin(void) { return 0; } -static inline bool put_mems_allowed(unsigned int seq) +static inline bool read_mems_allowed_retry(unsigned int seq) { - return true; + return false; } #endif /* !CONFIG_CPUSETS */ --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1022,7 +1022,7 @@ static void cpuset_change_task_nodemask( task_lock(tsk); /* * Determine if a loop is necessary if another thread is doing - * get_mems_allowed(). If at least one node remains unchanged and + * read_mems_allowed_begin(). If at least one node remains unchanged and * tsk does not have a mempolicy, then an empty nodemask will not be * possible when mems_allowed is larger than a word. */ --- a/mm/filemap.c +++ b/mm/filemap.c @@ -520,10 +520,10 @@ struct page *__page_cache_alloc(gfp_t gf if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; do { - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); page = alloc_pages_exact_node(n, gfp, 0); - } while (!put_mems_allowed(cpuset_mems_cookie) && !page); + } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); return page; } --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -540,7 +540,7 @@ static struct page *dequeue_huge_page_vm goto err; retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); zonelist = huge_zonelist(vma, address, htlb_alloc_mask(h), &mpol, &nodemask); @@ -562,7 +562,7 @@ retry_cpuset: } mpol_cond_put(mpol); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1897,7 +1897,7 @@ int node_random(const nodemask_t *maskp) * If the effective policy is 'BIND, returns a pointer to the mempolicy's * @nodemask for filtering the zonelist. * - * Must be protected by get_mems_allowed() + * Must be protected by read_mems_allowed_begin() */ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, @@ -2061,7 +2061,7 @@ alloc_pages_vma(gfp_t gfp, int order, st retry_cpuset: pol = get_vma_policy(current, vma, addr); - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); if (unlikely(pol->mode == MPOL_INTERLEAVE)) { unsigned nid; @@ -2069,7 +2069,7 @@ retry_cpuset: nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); mpol_cond_put(pol); page = alloc_page_interleave(gfp, order, nid); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; @@ -2079,7 +2079,7 @@ retry_cpuset: policy_nodemask(gfp, pol)); if (unlikely(mpol_needs_cond_ref(pol))) __mpol_put(pol); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; } @@ -2113,7 +2113,7 @@ struct page *alloc_pages_current(gfp_t g pol = &default_policy; retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); /* * No reference counting needed for current->mempolicy @@ -2126,7 +2126,7 @@ retry_cpuset: policy_zonelist(gfp, pol, numa_node_id()), policy_nodemask(gfp, pol)); - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2736,7 +2736,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, u return NULL; retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); /* The preferred zone is used for statistics later */ first_zones_zonelist(zonelist, high_zoneidx, @@ -2791,7 +2791,7 @@ out: * the mask is being updated. If a page allocation is about to fail, * check if the cpuset changed during allocation and if so, retry. */ - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page)) + if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; memcg_kmem_commit_charge(page, memcg, order); @@ -3059,9 +3059,9 @@ bool skip_free_areas_node(unsigned int f goto out; do { - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); ret = !node_isset(nid, cpuset_current_mems_allowed); - } while (!put_mems_allowed(cpuset_mems_cookie)); + } while (read_mems_allowed_retry(cpuset_mems_cookie)); out: return ret; } --- a/mm/slab.c +++ b/mm/slab.c @@ -3122,7 +3122,7 @@ static void *fallback_alloc(struct kmem_ local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); retry_cpuset: - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); zonelist = node_zonelist(slab_node(), flags); retry: @@ -3180,7 +3180,7 @@ retry: } } - if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !obj)) + if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return obj; } --- a/mm/slub.c +++ b/mm/slub.c @@ -1684,7 +1684,7 @@ static void *get_any_partial(struct kmem return NULL; do { - cpuset_mems_cookie = get_mems_allowed(); + cpuset_mems_cookie = read_mems_allowed_begin(); zonelist = node_zonelist(slab_node(), flags); for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; @@ -1696,19 +1696,17 @@ static void *get_any_partial(struct kmem object = get_partial_node(s, n, c, flags); if (object) { /* - * Return the object even if - * put_mems_allowed indicated that - * the cpuset mems_allowed was - * updated in parallel. It's a - * harmless race between the alloc - * and the cpuset update. + * Don't check read_mems_allowed_retry() + * here - if mems_allowed was updated in + * parallel, that was a harmless race + * between allocation and the cpuset + * update */ - put_mems_allowed(cpuset_mems_cookie); return object; } } } - } while (!put_mems_allowed(cpuset_mems_cookie)); + } while (read_mems_allowed_retry(cpuset_mems_cookie)); #endif return NULL; } Patches currently in stable-queue which might be from mgorman@xxxxxxx are queue-3.14/mm-vmscan-shrink_slab-rename-max_pass-freeable.patch queue-3.14/mm-thp-move-invariant-bug-check-out-of-loop-in-__split_huge_page_map.patch queue-3.14/swap-change-swap_list_head-to-plist-add-swap_avail_head.patch queue-3.14/lib-plist-add-helper-functions.patch queue-3.14/swap-change-swap_info-singly-linked-list-to-list_head.patch queue-3.14/mm-compaction-avoid-isolating-pinned-pages.patch queue-3.14/mm-readahead.c-fix-readahead-failure-for-memoryless-numa-nodes-and-limit-readahead-pages.patch queue-3.14/lib-plist-add-plist_requeue.patch queue-3.14/mm-exclude-memoryless-nodes-from-zone_reclaim.patch queue-3.14/mm-numa-do-not-mark-ptes-pte_numa-when-splitting-huge-pages.patch queue-3.14/mm-filemap.c-avoid-always-dirtying-mapping-flags-on-o_direct.patch queue-3.14/mm-compaction-ignore-pageblock-skip-when-manually-invoking-compaction.patch queue-3.14/vmscan-reclaim_clean_pages_from_list-must-use-mod_zone_page_state.patch queue-3.14/mm-compaction-do-not-call-suitable_migration_target-on-every-page.patch queue-3.14/mm-compaction-determine-isolation-mode-only-once.patch queue-3.14/mm-compaction-check-pageblock-suitability-once-per-pageblock.patch queue-3.14/mm-optimize-put_mems_allowed-usage.patch queue-3.14/mm-vmscan-respect-numa-policy-mask-when-shrinking-slab-on-direct-reclaim.patch queue-3.14/mm-compaction-clean-up-code-on-success-of-ballon-isolation.patch queue-3.14/hugetlb-ensure-hugepage-access-is-denied-if-hugepages-are-not-supported.patch queue-3.14/mm-migrate-close-race-between-migration-completion-and-mprotect.patch queue-3.14/mm-compaction-disallow-high-order-page-for-migration-target.patch queue-3.14/mm-compaction-change-the-timing-to-check-to-drop-the-spinlock.patch -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html