We need to do as we do now for soft-offline, and take poisoned pages off the buddy allocator. Otherwise we could face [1] as well. [1] https://lore.kernel.org/linux-mm/20190826104144.GA7849@linux/T/#u Signed-off-by: Oscar Salvador <osalvador@xxxxxxx> --- mm/memory-failure.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 48eb314598e0..3d491c0d3f91 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -791,6 +791,14 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) return MF_FAILED; } +static int me_huge_free_page(struct page *p) +{ + if (page_handle_poison(p, true, false)) + return MF_RECOVERED; + else + return MF_FAILED; +} + /* * Huge pages. Needs work. * Issues: @@ -818,8 +826,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) */ if (PageAnon(hpage)) put_page(hpage); - dissolve_free_huge_page(p); - res = MF_RECOVERED; + res = me_huge_free_page(p); lock_page(hpage); } @@ -1145,8 +1152,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) } } unlock_page(head); - dissolve_free_huge_page(p); - action_result(pfn, MF_MSG_FREE_HUGE, MF_DELAYED); + res = me_huge_free_page(p); + if (res == MF_FAILED) + num_poisoned_pages_dec(); + action_result(pfn, MF_MSG_FREE_HUGE, res); return 0; } @@ -1307,6 +1316,12 @@ int memory_failure(unsigned long pfn, int flags) if (PageHuge(p)) return memory_failure_hugetlb(pfn, flags); + + if (is_free_buddy_page(p) && page_handle_poison(p, true, false)) { + action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED); + return 0; + } + if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); @@ -1328,10 +1343,10 @@ int memory_failure(unsigned long pfn, int flags) * that may make page_ref_freeze()/page_ref_unfreeze() mismatch. */ if (!get_hwpoison_page(p)) { - if (is_free_buddy_page(p)) { - action_result(pfn, MF_MSG_BUDDY, MF_DELAYED); + if (is_free_buddy_page(p) && page_handle_poison(p, true, false)) { + action_result(pfn, MF_MSG_BUDDY, MF_RECOVERED); return 0; - } else { + } else if(!is_free_buddy_page(p)) { action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); return -EBUSY; } @@ -1354,8 +1369,8 @@ int memory_failure(unsigned long pfn, int flags) */ shake_page(p, 0); /* shake_page could have turned it free. */ - if (!PageLRU(p) && is_free_buddy_page(p)) { - action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED); + if (!PageLRU(p) && is_free_buddy_page(p) && page_handle_poison(p, true, false)) { + action_result(pfn, MF_MSG_BUDDY_2ND, MF_RECOVERED); return 0; } -- 2.12.3