From: Andi Kleen <ak@xxxxxxxxxxxxxxx> Hard offlining was always able to offline most kernel pages by setting the HWPoison flag on it and waiting for the next free. This works on all pages that get eventually freed. This didn't work for soft offlining unfortunately, because it cannot safely set the HWPoison flag on a still alive page. Handle this by introducing a second page flag HWPoisonOnFree that is handled by page free and enables HWPoison then. This does not add any overhead to page free because it can be handled as a bad flag which are already checked for. Since page flags are scarce on 32bit architectures this is only done on 64bit . Signed-off-by: Andi Kleen <ak@xxxxxxxxxxxxxxx> --- include/linux/mm.h | 1 + include/linux/page-flags.h | 15 +++++++++++++- mm/Kconfig | 4 +++ mm/memory-failure.c | 46 +++++++++++++++++++++++++++++++++++++++++++- mm/page_alloc.c | 2 + 5 files changed, 66 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 74949fb..f7da94d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1481,6 +1481,7 @@ enum mf_flags { extern void memory_failure(unsigned long pfn, int trapno); extern int __memory_failure(unsigned long pfn, int trapno, int flags); extern int unpoison_memory(unsigned long pfn); +extern void hwpoison_page_on_free(struct page *p); extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6fa3178..2d4196b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -107,6 +107,9 @@ enum pageflags { #endif #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ +#ifdef CONFIG_HWPOISON_ON_FREE + PG_hwpoison_on_free, /* hwpoison on free */ +#endif #endif __NR_PAGEFLAGS, @@ -280,6 +283,15 @@ PAGEFLAG_FALSE(HWPoison) #define __PG_HWPOISON 0 #endif +#if defined(CONFIG_MEMORY_FAILURE) && BITS_PER_LONG == 64 +PAGEFLAG(HWPoisonOnFree, hwpoison_on_free) +TESTSCFLAG(HWPoisonOnFree, hwpoison_on_free) +#define __PG_HWPOISON_ON_FREE (1UL << PG_hwpoison_on_free) +#else +PAGEFLAG_FALSE(HWPoisonOnFree) +#define __PG_HWPOISON_ON_FREE 0 +#endif + u64 stable_page_flags(struct page *page); static inline int PageUptodate(struct page *page) @@ -406,7 +418,8 @@ static inline void __ClearPageTail(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON) + 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ + __PG_HWPOISON_ON_FREE) /* * Flags checked when a page is prepped for return by the page allocator. diff --git a/mm/Kconfig b/mm/Kconfig index f0fb912..ede5444 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -269,6 +269,10 @@ config MEMORY_FAILURE even when some of its memory has uncorrected errors. This requires special hardware support and typically ECC memory. +config HWPOISON_ON_FREE + depends on MEMORY_FAILURE && 64BIT + def_bool y + config HWPOISON_INJECT tristate "HWPoison pages injector" depends on MEMORY_FAILURE && DEBUG_KERNEL && PROC_FS diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 9c26eec..34901f6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1268,6 +1268,14 @@ int soft_offline_page(struct page *page, int flags) if (ret == 0) goto done; +#ifdef CONFIG_HWPOISON_ON_FREE + /* + * When the page gets freed make sure to poison + * it immediately. + */ + SetPageHWPoisonOnFree(page); +#endif + /* * Page cache page we can handle? */ @@ -1279,7 +1287,15 @@ int soft_offline_page(struct page *page, int flags) shake_page(page, 1); /* - * Did it turn free? + * Did it get poisoned on free? + */ + if (PageHWPoison(page)) { + pr_info("soft_offline: %#lx: Page freed through shaking\n", pfn); + return 0; + } + + /* + * Try to grab it as a free page again. */ ret = get_any_page(page, pfn, 0); if (ret < 0) @@ -1287,12 +1303,23 @@ int soft_offline_page(struct page *page, int flags) if (ret == 0) goto done; } + + if (PageHWPoisonOnFree(page)) { + pr_info("soft_offline: %#lx: Delaying poision of unknown page %lx to free\n", + pfn, page->flags); + return -EIO; /* or 0? */ + } + if (!PageLRU(page)) { pr_debug("soft_offline: %#lx: unknown non LRU page type %lx\n", pfn, page->flags); return -EIO; } + /* + * Normal page cache page here. + */ + lock_page(page); wait_on_page_writeback(page); @@ -1389,3 +1416,20 @@ int is_hwpoison_address(unsigned long addr) return is_hwpoison_entry(entry); } EXPORT_SYMBOL_GPL(is_hwpoison_address); + +#ifdef CONFIG_HWPOISON_ON_FREE + +/* + * HWPoison a page after freeing. + * This is the fallback path for most pages we cannot free early. + */ +void hwpoison_page_on_free(struct page *p) +{ + get_page(p); + SetPageHWPoison(p); + ClearPageHWPoisonOnFree(p); + pr_info("MCE: %#lx: Delayed poisioning of page after free\n", + page_to_pfn(p)); + atomic_long_inc(&mce_bad_pages); +} +#endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a8cfa9c..519c24c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -564,6 +564,8 @@ static inline int free_pages_check(struct page *page) (page->mapping != NULL) | (atomic_read(&page->_count) != 0) | (page->flags & PAGE_FLAGS_CHECK_AT_FREE))) { + if (PageHWPoisonOnFree(page)) + hwpoison_page_on_free(page); bad_page(page); return 1; } -- 1.7.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>