+ mm-soft-offline-close-the-race-against-page-allocation.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: mm: soft-offline: close the race against page allocation
has been added to the -mm tree.  Its filename is
     mm-soft-offline-close-the-race-against-page-allocation.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/mm-soft-offline-close-the-race-against-page-allocation.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/mm-soft-offline-close-the-race-against-page-allocation.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
Subject: mm: soft-offline: close the race against page allocation

A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline.  This is
undesirable because soft-offline (which is about corrected error) is less
aggressive than hard-offline (which is about uncorrected error), and we
can make soft-offline fail and keep using the page for good reason like
"system is busy."

Two main changes of this patch are:

- setting migrate type of the target page to MIGRATE_ISOLATE. As done
  in free_unref_page_commit(), this makes kernel bypass pcplist when
  freeing the page. So we can assume that the page is in freelist just
  after put_page() returns,

- setting PG_hwpoison on free page under zone->lock which protects
  freelists, so this allows us to avoid setting PG_hwpoison on a page
  that is decided to be allocated soon.

Link: http://lkml.kernel.org/r/1531452366-11661-3-git-send-email-n-horiguchi@xxxxxxxxxxxxx
Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx>
Reported-by: Xishi Qiu <xishi.qiuxishi@xxxxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxxxx>
Cc: <zy.zhengyi@xxxxxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/linux/page-flags.h |    5 +++++
 include/linux/swapops.h    |   10 ----------
 mm/memory-failure.c        |   26 +++++++++++++++++++++-----
 mm/migrate.c               |    2 +-
 mm/page_alloc.c            |   29 +++++++++++++++++++++++++++++
 5 files changed, 56 insertions(+), 16 deletions(-)

diff -puN include/linux/page-flags.h~mm-soft-offline-close-the-race-against-page-allocation include/linux/page-flags.h
--- a/include/linux/page-flags.h~mm-soft-offline-close-the-race-against-page-allocation
+++ a/include/linux/page-flags.h
@@ -372,8 +372,13 @@ PAGEFLAG_FALSE(Uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
+extern bool set_hwpoison_free_buddy_page(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison)
+static inline bool set_hwpoison_free_buddy_page(struct page *page)
+{
+	return 0;
+}
 #define __PG_HWPOISON 0
 #endif
 
diff -puN include/linux/swapops.h~mm-soft-offline-close-the-race-against-page-allocation include/linux/swapops.h
--- a/include/linux/swapops.h~mm-soft-offline-close-the-race-against-page-allocation
+++ a/include/linux/swapops.h
@@ -340,11 +340,6 @@ static inline int is_hwpoison_entry(swp_
 	return swp_type(entry) == SWP_HWPOISON;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-	return TestSetPageHWPoison(page);
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 	atomic_long_inc(&num_poisoned_pages);
@@ -367,11 +362,6 @@ static inline int is_hwpoison_entry(swp_
 	return 0;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-	return false;
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 }
diff -puN mm/memory-failure.c~mm-soft-offline-close-the-race-against-page-allocation mm/memory-failure.c
--- a/mm/memory-failure.c~mm-soft-offline-close-the-race-against-page-allocation
+++ a/mm/memory-failure.c
@@ -57,6 +57,7 @@
 #include <linux/mm_inline.h>
 #include <linux/kfifo.h>
 #include <linux/ratelimit.h>
+#include <linux/page-isolation.h>
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct pa
 static int soft_offline_in_use_page(struct page *page, int flags)
 {
 	int ret;
+	int mt;
 	struct page *hpage = compound_head(page);
 
 	if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(stru
 		put_hwpoison_page(hpage);
 	}
 
+	/*
+	 * Setting MIGRATE_ISOLATE here ensures that the page will be linked
+	 * to free list immediately (not via pcplist) when released after
+	 * successful page migration. Otherwise we can't guarantee that the
+	 * page is really free after put_page() returns, so
+	 * set_hwpoison_free_buddy_page() highly likely fails.
+	 */
+	mt = get_pageblock_migratetype(page);
+	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
 	if (PageHuge(page))
 		ret = soft_offline_huge_page(page, flags);
 	else
 		ret = __soft_offline_page(page, flags);
-
+	set_pageblock_migratetype(page, mt);
 	return ret;
 }
 
-static void soft_offline_free_page(struct page *page)
+static int soft_offline_free_page(struct page *page)
 {
 	int rc = 0;
 	struct page *head = compound_head(page);
 
 	if (PageHuge(head))
 		rc = dissolve_free_huge_page(page);
-	if (!rc && !TestSetPageHWPoison(page))
-		num_poisoned_pages_inc();
+	if (!rc) {
+		if (set_hwpoison_free_buddy_page(page))
+			num_poisoned_pages_inc();
+		else
+			rc = -EBUSY;
+	}
+	return rc;
 }
 
 /**
@@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page,
 	if (ret > 0)
 		ret = soft_offline_in_use_page(page, flags);
 	else if (ret == 0)
-		soft_offline_free_page(page);
+		ret = soft_offline_free_page(page);
 
 	return ret;
 }
diff -puN mm/migrate.c~mm-soft-offline-close-the-race-against-page-allocation mm/migrate.c
--- a/mm/migrate.c~mm-soft-offline-close-the-race-against-page-allocation
+++ a/mm/migrate.c
@@ -1213,7 +1213,7 @@ out:
 			 * intentionally. Although it's rather weird,
 			 * it's how HWPoison flag works at the moment.
 			 */
-			if (!test_set_page_hwpoison(page))
+			if (set_hwpoison_free_buddy_page(page))
 				num_poisoned_pages_inc();
 		}
 	} else {
diff -puN mm/page_alloc.c~mm-soft-offline-close-the-race-against-page-allocation mm/page_alloc.c
--- a/mm/page_alloc.c~mm-soft-offline-close-the-race-against-page-allocation
+++ a/mm/page_alloc.c
@@ -8036,3 +8036,32 @@ bool is_free_buddy_page(struct page *pag
 
 	return order < MAX_ORDER;
 }
+
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Set PG_hwpoison flag if a given page is confirmed to be a free page
+ * within zone lock, which prevents the race against page allocation.
+ */
+bool set_hwpoison_free_buddy_page(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long flags;
+	unsigned int order;
+	bool hwpoisoned = false;
+
+	spin_lock_irqsave(&zone->lock, flags);
+	for (order = 0; order < MAX_ORDER; order++) {
+		struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+		if (PageBuddy(page_head) && page_order(page_head) >= order) {
+			if (!TestSetPageHWPoison(page))
+				hwpoisoned = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	return hwpoisoned;
+}
+#endif
_

Patches currently in -mm which might be from n-horiguchi@xxxxxxxxxxxxx are

mm-fix-race-on-soft-offlining-free-huge-pages.patch
mm-soft-offline-close-the-race-against-page-allocation.patch

--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux