+ mm-move-pcp-and-lru-pcp-drainging-into-single-wq.patch added to -mm tree

akpm@xxxxxxxxxxxxxxxxxxxx · Tue, 07 Mar 2017 15:12:13 -0800

The patch titled
     Subject: mm: move pcp and lru-pcp drainging into single wq
has been added to the -mm tree.  Its filename is
     mm-move-pcp-and-lru-pcp-drainging-into-single-wq.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/mm-move-pcp-and-lru-pcp-drainging-into-single-wq.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/mm-move-pcp-and-lru-pcp-drainging-into-single-wq.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Michal Hocko <mhocko@xxxxxxxx>
Subject: mm: move pcp and lru-pcp drainging into single wq

We currently have 2 specific WQ_RECLAIM workqueues in the mm code. 
vmstat_wq for updating pcp stats and lru_add_drain_wq dedicated to drain
per cpu lru caches.  This seems more than necessary because both can run
on a single WQ.  Both do not block on locks requiring a memory allocation
nor perform any allocations themselves.  We will save one rescuer thread
this way.

On the other hand drain_all_pages() queues work on the system wq which
doesn't have rescuer and so this depend on memory allocation (when all
workers are stuck allocating and new ones cannot be created).  This is not
critical as there should be somebody invoking the OOM killer (e.g.  the
forking worker) and get the situation unstuck and eventually performs the
draining.  Quite annoying though.  This worker should be using WQ_RECLAIM
as well.  We can reuse the same one as for lru draining and vmstat.

Link: http://lkml.kernel.org/r/20170307131751.24936-1-mhocko@xxxxxxxxxx
Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
Suggested-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>
Acked-by: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 mm/internal.h   |    7 +++++++
 mm/page_alloc.c |    9 ++++++++-
 mm/swap.c       |   27 ++++++++-------------------
 mm/vmstat.c     |   14 ++++++++------
 4 files changed, 31 insertions(+), 26 deletions(-)

diff -puN mm/internal.h~mm-move-pcp-and-lru-pcp-drainging-into-single-wq mm/internal.h

--- a/mm/internal.h~mm-move-pcp-and-lru-pcp-drainging-into-single-wq
+++ a/mm/internal.h
@@ -486,6 +486,13 @@ unsigned long reclaim_clean_pages_from_l
 enum ttu_flags;
 struct tlbflush_unmap_batch;
 
+
+/*
+ * only for MM internal work items which do not depend on
+ * any allocations or locks which might depend on allocations
+ */
+extern struct workqueue_struct *mm_percpu_wq;
+
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 void try_to_unmap_flush(void);
 void try_to_unmap_flush_dirty(void);
diff -puN mm/page_alloc.c~mm-move-pcp-and-lru-pcp-drainging-into-single-wq mm/page_alloc.c
--- a/mm/page_alloc.c~mm-move-pcp-and-lru-pcp-drainging-into-single-wq
+++ a/mm/page_alloc.c
@@ -2363,6 +2363,13 @@ void drain_all_pages(struct zone *zone)
 	 */
 	static cpumask_t cpus_with_pcps;
 
+	/*
+	 * Make sure nobody triggers this path before mm_percpu_wq is fully
+	 * initialized.
+	 */
+	if (WARN_ON_ONCE(!mm_percpu_wq))
+		return;
+
 	/* Workqueues cannot recurse */
 	if (current->flags & PF_WQ_WORKER)
 		return;
@@ -2412,7 +2419,7 @@ void drain_all_pages(struct zone *zone)
 	for_each_cpu(cpu, &cpus_with_pcps) {
 		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
 		INIT_WORK(work, drain_local_pages_wq);
-		schedule_work_on(cpu, work);
+		queue_work_on(cpu, mm_percpu_wq, work);
 	}
 	for_each_cpu(cpu, &cpus_with_pcps)
 		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
diff -puN mm/swap.c~mm-move-pcp-and-lru-pcp-drainging-into-single-wq mm/swap.c
--- a/mm/swap.c~mm-move-pcp-and-lru-pcp-drainging-into-single-wq
+++ a/mm/swap.c
@@ -677,30 +677,19 @@ static void lru_add_drain_per_cpu(struct
 
 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 
-/*
- * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
- * workqueue, aiding in getting memory freed.
- */
-static struct workqueue_struct *lru_add_drain_wq;
-
-static int __init lru_init(void)
-{
-	lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
-
-	if (WARN(!lru_add_drain_wq,
-		"Failed to create workqueue lru_add_drain_wq"))
-		return -ENOMEM;
-
-	return 0;
-}
-early_initcall(lru_init);
-
 void lru_add_drain_all(void)
 {
 	static DEFINE_MUTEX(lock);
 	static struct cpumask has_work;
 	int cpu;
 
+	/*
+	 * Make sure nobody triggers this path before mm_percpu_wq is fully
+	 * initialized.
+	 */
+	if (WARN_ON(!mm_percpu_wq))
+		return;
+
 	mutex_lock(&lock);
 	get_online_cpus();
 	cpumask_clear(&has_work);
@@ -714,7 +703,7 @@ void lru_add_drain_all(void)
 		    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
-			queue_work_on(cpu, lru_add_drain_wq, work);
+			queue_work_on(cpu, mm_percpu_wq, work);
 			cpumask_set_cpu(cpu, &has_work);
 		}
 	}
diff -puN mm/vmstat.c~mm-move-pcp-and-lru-pcp-drainging-into-single-wq mm/vmstat.c
--- a/mm/vmstat.c~mm-move-pcp-and-lru-pcp-drainging-into-single-wq
+++ a/mm/vmstat.c
@@ -1563,7 +1563,6 @@ static const struct file_operations proc
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SMP
-static struct workqueue_struct *vmstat_wq;
 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 int sysctl_stat_interval __read_mostly = HZ;
 
@@ -1621,7 +1620,7 @@ static void vmstat_update(struct work_st
 		 * to occur in the future. Keep on running the
 		 * update worker thread.
 		 */
-		queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
 				this_cpu_ptr(&vmstat_work),
 				round_jiffies_relative(sysctl_stat_interval));
 	}
@@ -1700,7 +1699,7 @@ static void vmstat_shepherd(struct work_
 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
 
 		if (!delayed_work_pending(dw) && need_update(cpu))
-			queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
 	}
 	put_online_cpus();
 
@@ -1716,7 +1715,6 @@ static void __init start_shepherd_timer(
 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
 			vmstat_update);
 
-	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
 	schedule_delayed_work(&shepherd,
 		round_jiffies_relative(sysctl_stat_interval));
 }
@@ -1762,11 +1760,15 @@ static int vmstat_cpu_dead(unsigned int
 
 #endif
 
+struct workqueue_struct *mm_percpu_wq;
+
 static int __init setup_vmstat(void)
 {
-#ifdef CONFIG_SMP
-	int ret;
+	int ret __maybe_unused;
 
+	mm_percpu_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+
+#ifdef CONFIG_SMP
 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
 					NULL, vmstat_cpu_dead);
 	if (ret < 0)
_

Patches currently in -mm which might be from mhocko@xxxxxxxx are

lockdep-allow-to-disable-reclaim-lockup-detection.patch
xfs-abstract-pf_fstrans-to-pf_memalloc_nofs.patch
mm-introduce-memalloc_nofs_saverestore-api.patch
xfs-use-memalloc_nofs_saverestore-instead-of-memalloc_noio.patch
jbd2-mark-the-transaction-context-with-the-scope-gfp_nofs-context.patch
jbd2-make-the-whole-kjournald2-kthread-nofs-safe.patch
mm-move-pcp-and-lru-pcp-drainging-into-single-wq.patch
mm-introduce-kvalloc-helpers.patch
mm-support-__gfp_repeat-in-kvmalloc_node-for-32kb.patch
rhashtable-simplify-a-strange-allocation-pattern.patch
ila-simplify-a-strange-allocation-pattern.patch
xattr-zero-out-memory-copied-to-userspace-in-getxattr.patch
treewide-use-kvalloc-rather-than-opencoded-variants.patch
net-use-kvmalloc-with-__gfp_repeat-rather-than-open-coded-variant.patch
md-use-kvmalloc-rather-than-opencoded-variant.patch
bcache-use-kvmalloc.patch
mm-vmalloc-use-__gfp_highmem-implicitly.patch

--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html