+ mm-disable-interrupts-while-initializing-deferred-pages.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: mm: disable interrupts while initializing deferred pages
has been added to the -mm tree.  Its filename is
     mm-disable-interrupts-while-initializing-deferred-pages.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/mm-disable-interrupts-while-initializing-deferred-pages.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/mm-disable-interrupts-while-initializing-deferred-pages.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>
Subject: mm: disable interrupts while initializing deferred pages

Vlastimil Babka reported about a window issue during which when deferred
pages are initialized, and the current version of on-demand initialization
is finished, allocations may fail.  While this is highly unlikely
scenario, since this kind of allocation request must be large, and must
come from interrupt handler, we still want to cover it.

We solve this by initializing deferred pages with interrupts disabled, and
holding node_size_lock spin lock while pages in the node are being
initialized.  The on-demand deferred page initialization that comes later
will use the same lock, and thus synchronize with deferred_init_memmap().

It is unlikely for threads that initialize deferred pages to be
interrupted.  They run soon after smp_init(), but before modules are
initialized, and long before user space programs.  This is why there is no
adverse effect of having these threads running with interrupts disabled.

Link: http://lkml.kernel.org/r/20180309220807.24961-2-pasha.tatashin@xxxxxxxxxx
Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>
Cc: Steven Sistare <steven.sistare@xxxxxxxxxx>
Cc: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx>
Cc: Masayoshi Mizuma <m.mizuma@xxxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: AKASHI Takahiro <takahiro.akashi@xxxxxxxxxx>
Cc: Gioh Kim <gi-oh.kim@xxxxxxxxxxxxxxxx>
Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx>
Cc: Yaowei Bai <baiyaowei@xxxxxxxxxxxxxxxxxxxx>
Cc: Wei Yang <richard.weiyang@xxxxxxxxx>
Cc: Paul Burton <paul.burton@xxxxxxxx>
Cc: Miles Chen <miles.chen@xxxxxxxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/linux/memory_hotplug.h |   73 ++++++++++++++++++++-----------
 include/linux/mmzone.h         |    5 +-
 mm/page_alloc.c                |   22 ++++-----
 3 files changed, 62 insertions(+), 38 deletions(-)

diff -puN include/linux/memory_hotplug.h~mm-disable-interrupts-while-initializing-deferred-pages include/linux/memory_hotplug.h
--- a/include/linux/memory_hotplug.h~mm-disable-interrupts-while-initializing-deferred-pages
+++ a/include/linux/memory_hotplug.h
@@ -52,24 +52,6 @@ enum {
 };
 
 /*
- * pgdat resizing functions
- */
-static inline
-void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
-{
-	spin_lock_irqsave(&pgdat->node_size_lock, *flags);
-}
-static inline
-void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
-{
-	spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
-}
-static inline
-void pgdat_resize_init(struct pglist_data *pgdat)
-{
-	spin_lock_init(&pgdat->node_size_lock);
-}
-/*
  * Zone resizing functions
  *
  * Note: any attempt to resize a zone should has pgdat_resize_lock()
@@ -246,13 +228,6 @@ extern void clear_zone_contiguous(struct
 	___page;				\
  })
 
-/*
- * Stub functions for when hotplug is off
- */
-static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
-static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
-static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
-
 static inline unsigned zone_span_seqbegin(struct zone *zone)
 {
 	return 0;
@@ -293,6 +268,54 @@ static inline bool movable_node_is_enabl
 }
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
+#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
+/*
+ * pgdat resizing functions
+ */
+static inline
+void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
+{
+	spin_lock_irqsave(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
+{
+	spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_init(struct pglist_data *pgdat)
+{
+	spin_lock_init(&pgdat->node_size_lock);
+}
+
+/* Disable interrupts and save previous IRQ state in flags before locking */
+static inline
+void pgdat_resize_lock_irq(struct pglist_data *pgdat, unsigned long *flags)
+{
+	unsigned long tmp_flags;
+
+	local_irq_save(*flags);
+	local_irq_disable();
+	pgdat_resize_lock(pgdat, &tmp_flags);
+}
+
+static inline
+void pgdat_resize_unlock_irq(struct pglist_data *pgdat, unsigned long *flags)
+{
+	pgdat_resize_unlock(pgdat, flags);
+}
+
+#else /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */
+/*
+ * Stub functions for when hotplug is off
+ */
+static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_lock_irq(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_unlock_irq(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
+#endif /* !(CONFIG_MEMORY_HOTPLUG || CONFIG_DEFERRED_STRUCT_PAGE_INIT) */
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
 extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
diff -puN include/linux/mmzone.h~mm-disable-interrupts-while-initializing-deferred-pages include/linux/mmzone.h
--- a/include/linux/mmzone.h~mm-disable-interrupts-while-initializing-deferred-pages
+++ a/include/linux/mmzone.h
@@ -633,14 +633,15 @@ typedef struct pglist_data {
 #ifndef CONFIG_NO_BOOTMEM
 	struct bootmem_data *bdata;
 #endif
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 	/*
 	 * Must be held any time you expect node_start_pfn, node_present_pages
 	 * or node_spanned_pages stay constant.  Holding this will also
 	 * guarantee that any pfn_valid() stays that way.
 	 *
 	 * pgdat_resize_lock() and pgdat_resize_unlock() are provided to
-	 * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG.
+	 * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG
+	 * or CONFIG_DEFERRED_STRUCT_PAGE_INIT.
 	 *
 	 * Nests above zone->lock and zone->span_seqlock
 	 */
diff -puN mm/page_alloc.c~mm-disable-interrupts-while-initializing-deferred-pages mm/page_alloc.c
--- a/mm/page_alloc.c~mm-disable-interrupts-while-initializing-deferred-pages
+++ a/mm/page_alloc.c
@@ -1506,7 +1506,6 @@ static void __init deferred_free_pages(i
 		} else if (!(pfn & nr_pgmask)) {
 			deferred_free_range(pfn - nr_free, nr_free);
 			nr_free = 1;
-			cond_resched();
 		} else {
 			nr_free++;
 		}
@@ -1533,12 +1532,10 @@ static unsigned long  __init deferred_in
 		if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) {
 			page = NULL;
 			continue;
-		} else if (!page || !(pfn & nr_pgmask)) {
+		} else if (!page || !(pfn & nr_pgmask))
 			page = pfn_to_page(pfn);
-			cond_resched();
-		} else {
+		else
 			page++;
-		}
 		__init_single_page(page, pfn, zid, nid, true);
 		nr_pages++;
 	}
@@ -1552,23 +1549,25 @@ static int __init deferred_init_memmap(v
 	int nid = pgdat->node_id;
 	unsigned long start = jiffies;
 	unsigned long nr_pages = 0;
-	unsigned long spfn, epfn;
+	unsigned long spfn, epfn, first_init_pfn, flags;
 	phys_addr_t spa, epa;
 	int zid;
 	struct zone *zone;
-	unsigned long first_init_pfn = pgdat->first_deferred_pfn;
 	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
 	u64 i;
 
+	/* Bind memory initialisation thread to a local node if possible */
+	if (!cpumask_empty(cpumask))
+		set_cpus_allowed_ptr(current, cpumask);
+
+	pgdat_resize_lock_irq(pgdat, &flags);
+	first_init_pfn = pgdat->first_deferred_pfn;
 	if (first_init_pfn == ULONG_MAX) {
+		pgdat_resize_unlock_irq(pgdat, &flags);
 		pgdat_init_report_one_done();
 		return 0;
 	}
 
-	/* Bind memory initialisation thread to a local node if possible */
-	if (!cpumask_empty(cpumask))
-		set_cpus_allowed_ptr(current, cpumask);
-
 	/* Sanity check boundaries */
 	BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
 	BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
@@ -1598,6 +1597,7 @@ static int __init deferred_init_memmap(v
 		epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
 		deferred_free_pages(nid, zid, spfn, epfn);
 	}
+	pgdat_resize_unlock_irq(pgdat, &flags);
 
 	/* Sanity check that the next zone really is unpopulated */
 	WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
_

Patches currently in -mm which might be from pasha.tatashin@xxxxxxxxxx are

mm-disable-interrupts-while-initializing-deferred-pages.patch
mm-initialize-pages-on-demand-during-boot.patch
mm-initialize-pages-on-demand-during-boot-fix-3.patch
mm-initialize-pages-on-demand-during-boot-fix-4.patch
mm-initialize-pages-on-demand-during-boot-v5.patch
mm-memory_hotplug-enforce-block-size-aligned-range-check.patch
x86-mm-memory_hotplug-determine-block-size-based-on-the-end-of-boot-memory.patch
x86-mm-memory_hotplug-determine-block-size-based-on-the-end-of-boot-memory-v4.patch
mm-uninitialized-struct-page-poisoning-sanity-checking.patch
mm-uninitialized-struct-page-poisoning-sanity-checking-v4.patch
mm-memory_hotplug-optimize-probe-routine.patch
mm-memory_hotplug-dont-read-nid-from-struct-page-during-hotplug.patch
mm-memory_hotplug-dont-read-nid-from-struct-page-during-hotplug-v5.patch
mm-memory_hotplug-optimize-memory-hotplug.patch
mm-memory_hotplug-optimize-memory-hotplug-v5.patch
xen-mm-allow-deferred-page-initialization-for-xen-pv-domains.patch
sparc64-ng4-memset-32-bits-overflow.patch

--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux