From: Naoya Horiguchi <naoya.horiguchi@xxxxxxx> There is a race condition between memory_failure_hugetlb() and hugetlb free/demotion, which causes setting PageHWPoison flag on the wrong page (which was a hugetlb when memory_failrue() was called, but was removed or demoted when memory_failure_hugetlb() is called). This results in killing wrong processes. So set PageHWPoison flag with holding page lock, Signed-off-by: Naoya Horiguchi <naoya.horiguchi@xxxxxxx> --- mm/memory-failure.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ac6492e36978..fe25eee8f9d6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1494,24 +1494,11 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) int res; unsigned long page_flags; - if (TestSetPageHWPoison(head)) { - pr_err("Memory failure: %#lx: already hardware poisoned\n", - pfn); - res = -EHWPOISON; - if (flags & MF_ACTION_REQUIRED) - res = kill_accessing_process(current, page_to_pfn(head), flags); - return res; - } - - num_poisoned_pages_inc(); - if (!(flags & MF_COUNT_INCREASED)) { res = get_hwpoison_page(p, flags); if (!res) { lock_page(head); if (hwpoison_filter(p)) { - if (TestClearPageHWPoison(head)) - num_poisoned_pages_dec(); unlock_page(head); return -EOPNOTSUPP; } @@ -1544,13 +1531,16 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) page_flags = head->flags; if (hwpoison_filter(p)) { - if (TestClearPageHWPoison(head)) - num_poisoned_pages_dec(); put_page(p); res = -EOPNOTSUPP; goto out; } + if (TestSetPageHWPoison(head)) + goto already_hwpoisoned; + + num_poisoned_pages_inc(); + /* * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so * simply disable it. In order to make it work properly, we need @@ -1576,6 +1566,13 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) out: unlock_page(head); return res; +already_hwpoisoned: + unlock_page(head); + pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); + res = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) + res = kill_accessing_process(current, page_to_pfn(head), flags); + return res; } static int memory_failure_dev_pagemap(unsigned long pfn, int flags, -- 2.25.1