+ cpuset-use-static-key-better-and-convert-to-new-api.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: cpuset: use static key better and convert to new API
has been added to the -mm tree.  Its filename is
     cpuset-use-static-key-better-and-convert-to-new-api.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/cpuset-use-static-key-better-and-convert-to-new-api.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/cpuset-use-static-key-better-and-convert-to-new-api.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Vlastimil Babka <vbabka@xxxxxxx>
Subject: cpuset: use static key better and convert to new API

An important function for cpusets is cpuset_node_allowed(), which
optimizes on the fact if there's a single root CPU set, it must be
trivially allowed.  But the check "nr_cpusets() <= 1" doesn't use the
cpusets_enabled_key static key the right way where static keys eliminate
branching overhead with jump labels.

This patch converts it so that static key is used properly. It's also switched
to the new static key API and the checking functions are converted to return
bool instead of int. We also provide a new variant __cpuset_zone_allowed()
which expects that the static key check was already done and they key was
enabled. This is needed for get_page_from_freelist() where we want to also
avoid the relatively slower check when ALLOC_CPUSET is not set in alloc_flags.

The impact on the page allocator microbenchmark is less than expected but
the cleanup in itself is worthwhile.

                                           4.6.0-rc2                  4.6.0-rc2
                                     multcheck-v1r20               cpuset-v1r20
Min      alloc-odr0-1               348.00 (  0.00%)           348.00 (  0.00%)
Min      alloc-odr0-2               254.00 (  0.00%)           254.00 (  0.00%)
Min      alloc-odr0-4               213.00 (  0.00%)           213.00 (  0.00%)
Min      alloc-odr0-8               186.00 (  0.00%)           183.00 (  1.61%)
Min      alloc-odr0-16              173.00 (  0.00%)           171.00 (  1.16%)
Min      alloc-odr0-32              166.00 (  0.00%)           163.00 (  1.81%)
Min      alloc-odr0-64              162.00 (  0.00%)           159.00 (  1.85%)
Min      alloc-odr0-128             160.00 (  0.00%)           157.00 (  1.88%)
Min      alloc-odr0-256             169.00 (  0.00%)           166.00 (  1.78%)
Min      alloc-odr0-512             180.00 (  0.00%)           180.00 (  0.00%)
Min      alloc-odr0-1024            188.00 (  0.00%)           187.00 (  0.53%)
Min      alloc-odr0-2048            194.00 (  0.00%)           193.00 (  0.52%)
Min      alloc-odr0-4096            199.00 (  0.00%)           198.00 (  0.50%)
Min      alloc-odr0-8192            202.00 (  0.00%)           201.00 (  0.50%)
Min      alloc-odr0-16384           203.00 (  0.00%)           202.00 (  0.49%)

Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx>
Signed-off-by: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
Cc: Jesper Dangaard Brouer <brouer@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/linux/cpuset.h |   42 ++++++++++++++++++++++++++-------------
 kernel/cpuset.c        |   14 ++++++-------
 mm/page_alloc.c        |    2 -
 3 files changed, 36 insertions(+), 22 deletions(-)

diff -puN include/linux/cpuset.h~cpuset-use-static-key-better-and-convert-to-new-api include/linux/cpuset.h
--- a/include/linux/cpuset.h~cpuset-use-static-key-better-and-convert-to-new-api
+++ a/include/linux/cpuset.h
@@ -16,26 +16,26 @@
 
 #ifdef CONFIG_CPUSETS
 
-extern struct static_key cpusets_enabled_key;
+extern struct static_key_false cpusets_enabled_key;
 static inline bool cpusets_enabled(void)
 {
-	return static_key_false(&cpusets_enabled_key);
+	return static_branch_unlikely(&cpusets_enabled_key);
 }
 
 static inline int nr_cpusets(void)
 {
 	/* jump label reference count + the top-level cpuset */
-	return static_key_count(&cpusets_enabled_key) + 1;
+	return static_key_count(&cpusets_enabled_key.key) + 1;
 }
 
 static inline void cpuset_inc(void)
 {
-	static_key_slow_inc(&cpusets_enabled_key);
+	static_branch_inc(&cpusets_enabled_key);
 }
 
 static inline void cpuset_dec(void)
 {
-	static_key_slow_dec(&cpusets_enabled_key);
+	static_branch_dec(&cpusets_enabled_key);
 }
 
 extern int cpuset_init(void);
@@ -48,16 +48,25 @@ extern nodemask_t cpuset_mems_allowed(st
 void cpuset_init_current_mems_allowed(void);
 int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
 
-extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);
+extern bool __cpuset_node_allowed(int node, gfp_t gfp_mask);
 
-static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
+static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
 {
-	return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask);
+	if (cpusets_enabled())
+		return __cpuset_node_allowed(node, gfp_mask);
+	return true;
 }
 
-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
-	return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+	return __cpuset_node_allowed(zone_to_nid(z), gfp_mask);
+}
+
+static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+{
+	if (cpusets_enabled())
+		return __cpuset_zone_allowed(z, gfp_mask);
+	return true;
 }
 
 extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
@@ -174,14 +183,19 @@ static inline int cpuset_nodemask_valid_
 	return 1;
 }
 
-static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
+static inline bool cpuset_node_allowed(int node, gfp_t gfp_mask)
 {
-	return 1;
+	return true;
 }
 
-static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
-	return 1;
+	return true;
+}
+
+static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+{
+	return true;
 }
 
 static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
diff -puN kernel/cpuset.c~cpuset-use-static-key-better-and-convert-to-new-api kernel/cpuset.c
--- a/kernel/cpuset.c~cpuset-use-static-key-better-and-convert-to-new-api
+++ a/kernel/cpuset.c
@@ -62,7 +62,7 @@
 #include <linux/cgroup.h>
 #include <linux/wait.h>
 
-struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
 
 /* See "Frequency meter" comments, below. */
 
@@ -2528,27 +2528,27 @@ static struct cpuset *nearest_hardwall_a
  *	GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
  *	GFP_USER     - only nodes in current tasks mems allowed ok.
  */
-int __cpuset_node_allowed(int node, gfp_t gfp_mask)
+bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
 {
 	struct cpuset *cs;		/* current cpuset ancestors */
 	int allowed;			/* is allocation in zone z allowed? */
 	unsigned long flags;
 
 	if (in_interrupt())
-		return 1;
+		return true;
 	if (node_isset(node, current->mems_allowed))
-		return 1;
+		return true;
 	/*
 	 * Allow tasks that have access to memory reserves because they have
 	 * been OOM killed to get memory anywhere.
 	 */
 	if (unlikely(test_thread_flag(TIF_MEMDIE)))
-		return 1;
+		return true;
 	if (gfp_mask & __GFP_HARDWALL)	/* If hardwall request, stop here */
-		return 0;
+		return false;
 
 	if (current->flags & PF_EXITING) /* Let dying task have memory */
-		return 1;
+		return true;
 
 	/* Not hardwall and node outside mems_allowed: scan up cpusets */
 	spin_lock_irqsave(&callback_lock, flags);
diff -puN mm/page_alloc.c~cpuset-use-static-key-better-and-convert-to-new-api mm/page_alloc.c
--- a/mm/page_alloc.c~cpuset-use-static-key-better-and-convert-to-new-api
+++ a/mm/page_alloc.c
@@ -2877,7 +2877,7 @@ zonelist_scan:
 
 		if (cpusets_enabled() &&
 			(alloc_flags & ALLOC_CPUSET) &&
-			!cpuset_zone_allowed(zone, gfp_mask))
+			!__cpuset_zone_allowed(zone, gfp_mask))
 				continue;
 		/*
 		 * Distribute pages in proportion to the individual
_

Patches currently in -mm which might be from vbabka@xxxxxxx are

mm-compaction-wrap-calculating-first-and-last-pfn-of-pageblock.patch
mm-compaction-reduce-spurious-pcplist-drains.patch
mm-compaction-skip-blocks-where-isolation-fails-in-async-direct-compaction.patch
mm-compaction-direct-freepage-allocation-for-async-direct-compaction.patch
cpuset-use-static-key-better-and-convert-to-new-api.patch

--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux