The race condition addressed in commit add05cecef80 ("mm: soft-offline: don't free target page in successful page migration") was not closed completely, because that can happen not only for soft-offline, but also for hard-offline. Consider that a slab page is about to be freed into buddy pool, and then an uncorrected memory error hits the page just after entering __free_one_page(), then VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP) is triggered, despite the fact that it's not necessary because the data on the affected page is not consumed. To solve it, this patch drops __PG_HWPOISON from page flag checks at allocation/free time. I think it's justified because __PG_HWPOISON flags is defined to prevent the page from being reused and setting it outside the page's alloc-free cycle is a designed behavior (not a bug.) And the patch reverts most of the changes from commit add05cecef80 about the new refcounting rule of soft-offlined pages, which is no longer necessary. Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> --- include/linux/page-flags.h | 10 +++++++--- mm/huge_memory.c | 7 +------ mm/memory-failure.c | 6 +++++- mm/migrate.c | 9 +++------ mm/page_alloc.c | 4 ++++ 5 files changed, 20 insertions(+), 16 deletions(-) diff --git v4.2-rc2.orig/include/linux/page-flags.h v4.2-rc2/include/linux/page-flags.h index f34e040b34e9..53400f101f2d 100644 --- v4.2-rc2.orig/include/linux/page-flags.h +++ v4.2-rc2/include/linux/page-flags.h @@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ + 1 << PG_unevictable | __PG_MLOCKED | \ __PG_COMPOUND_LOCK) /* * Flags checked when a page is prepped for return by the page allocator. - * Pages being prepped should not have any flags set. It they are set, + * Pages being prepped should not have these flags set. It they are set, * there has been a kernel bug or struct page corruption. + * + * __PG_HWPOISON is exceptional because it need to be kept beyond page's + * alloc-free cycle to prevent from reusing the page. */ -#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) +#define PAGE_FLAGS_CHECK_AT_PREP \ + (((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) #define PAGE_FLAGS_PRIVATE \ (1 << PG_private | 1 << PG_private_2) diff --git v4.2-rc2.orig/mm/huge_memory.c v4.2-rc2/mm/huge_memory.c index c107094f79ba..097c7a4bfbd9 100644 --- v4.2-rc2.orig/mm/huge_memory.c +++ v4.2-rc2/mm/huge_memory.c @@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page, /* after clearing PageTail the gup refcount can be released */ smp_mb__after_atomic(); - /* - * retain hwpoison flag of the poisoned tail page: - * fix for the unsuitable process killed on Guest Machine(KVM) - * by the memory-failure. - */ - page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON; + page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; page_tail->flags |= (page->flags & ((1L << PG_referenced) | (1L << PG_swapbacked) | diff --git v4.2-rc2.orig/mm/memory-failure.c v4.2-rc2/mm/memory-failure.c index 421d7c9b30f4..755f87e4ec64 100644 --- v4.2-rc2.orig/mm/memory-failure.c +++ v4.2-rc2/mm/memory-failure.c @@ -1723,6 +1723,9 @@ int soft_offline_page(struct page *page, int flags) get_online_mems(); + if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) + set_migratetype_isolate(page, true); + ret = get_any_page(page, pfn, flags); put_online_mems(); if (ret > 0) { /* for in-use pages */ @@ -1730,7 +1733,7 @@ int soft_offline_page(struct page *page, int flags) ret = soft_offline_huge_page(page, flags); else ret = __soft_offline_page(page, flags); - } else if (ret == 0) { /* for free pages */ + } else if (ret == 0) { if (PageHuge(page)) { set_page_hwpoison_huge_page(hpage); if (!dequeue_hwpoisoned_huge_page(hpage)) @@ -1741,5 +1744,6 @@ int soft_offline_page(struct page *page, int flags) atomic_long_inc(&num_poisoned_pages); } } + unset_migratetype_isolate(page, MIGRATE_MOVABLE); return ret; } diff --git v4.2-rc2.orig/mm/migrate.c v4.2-rc2/mm/migrate.c index ee401e4e5ef1..c37d5772767b 100644 --- v4.2-rc2.orig/mm/migrate.c +++ v4.2-rc2/mm/migrate.c @@ -918,8 +918,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, static ICE_noinline int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, unsigned long private, struct page *page, - int force, enum migrate_mode mode, - enum migrate_reason reason) + int force, enum migrate_mode mode) { int rc = 0; int *result = NULL; @@ -950,8 +949,7 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); - if (reason != MR_MEMORY_FAILURE) - putback_lru_page(page); + putback_lru_page(page); } /* @@ -1124,8 +1122,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, pass > 2, mode); else rc = unmap_and_move(get_new_page, put_new_page, - private, page, pass > 2, mode, - reason); + private, page, pass > 2, mode); switch(rc) { case -ENOMEM: diff --git v4.2-rc2.orig/mm/page_alloc.c v4.2-rc2/mm/page_alloc.c index 506eac8b38af..e32d58ce5d2f 100644 --- v4.2-rc2.orig/mm/page_alloc.c +++ v4.2-rc2/mm/page_alloc.c @@ -1287,6 +1287,10 @@ static inline int check_new_page(struct page *page) bad_reason = "non-NULL mapping"; if (unlikely(atomic_read(&page->_count) != 0)) bad_reason = "nonzero _count"; + if (unlikely(page->flags & __PG_HWPOISON)) { + bad_reason = "HWPoisoned (hardware-corrupted)"; + bad_flags = __PG_HWPOISON; + } if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; bad_flags = PAGE_FLAGS_CHECK_AT_PREP; -- 2.4.3 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href