On Wed, Feb 16, 2022 at 10:00:38PM -0500, luofei wrote: > After successfully obtaining the reference count of the huge > page, it is still necessary to call hwpoison_filter() to make a > filter judgement, otherwise the filter hugepage will be unmaped > and the related process may be killed. > > Also when the huge page meets the filter conditions, it should > not be regarded as successful memory_failure() processing for > mce handler, but should return a value to inform the caller, > otherwise the caller regards the error page has been identified > and isolated, which may lead to calling set_mce_nospec() to change > page attribute, etc. > > Signed-off-by: luofei <luofei@xxxxxxxxxxxx> This patch seems to do two separate things (introducing MF_MCE_HANDLE, and adding hwpoison_filter() in memory_failure_hugetlb()), so could you separate the patch into two? > --- > arch/x86/kernel/cpu/mce/core.c | 22 +++++++++++----------- > include/linux/mm.h | 1 + > mm/memory-failure.c | 25 +++++++++++++++++++++++-- > 3 files changed, 35 insertions(+), 13 deletions(-) > > diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c > index 5818b837fd4d..c2b99c60225f 100644 > --- a/arch/x86/kernel/cpu/mce/core.c > +++ b/arch/x86/kernel/cpu/mce/core.c > @@ -612,7 +612,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, > return NOTIFY_DONE; > > pfn = mce->addr >> PAGE_SHIFT; > - if (!memory_failure(pfn, 0)) { > + if (!memory_failure(pfn, MF_MCE_HANDLE)) { > set_mce_nospec(pfn, whole_page(mce)); > mce->kflags |= MCE_HANDLED_UC; > } > @@ -1286,7 +1286,7 @@ static void kill_me_now(struct callback_head *ch) > static void kill_me_maybe(struct callback_head *cb) > { > struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); > - int flags = MF_ACTION_REQUIRED; > + int flags = MF_ACTION_REQUIRED | MF_MCE_HANDLE; > int ret; > > p->mce_count = 0; > @@ -1300,14 +1300,14 @@ static void kill_me_maybe(struct callback_head *cb) > set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); > sync_core(); > return; > - } > - > - /* > - * -EHWPOISON from memory_failure() means that it already sent SIGBUS > - * to the current process with the proper error info, so no need to > - * send SIGBUS here again. > - */ > - if (ret == -EHWPOISON) > + } else if (ret == -EHWPOISON || ret == 1) > + /* > + * -EHWPOISON from memory_failure() means that it already sent SIGBUS > + * to the current process with the proper error info, so no need to > + * send SIGBUS here again. > + * > + * 1 means it's a filter page, no need to deal with. > + */ The new return code 1 seems to be handled in the same manner as -EHWPOISON, so how about simply using -EHWPOISON as return code for the new case? Then, the meaning of -EHWPOISON at this context would change like below: /* - * -EHWPOISON from memory_failure() means that it already sent SIGBUS - * to the current process with the proper error info, so no need to - * send SIGBUS here again. + * -EHWPOISON from memory_failure() means that memory_failure() did + * not handle the error event for the following reason: + * - SIGBUS has already been sent to the current process with the + * proper error info, or + * - hwpoison_filter() filtered the event, + * so no need to deal with it more. */ Thanks, Naoya Horiguchi > return; > > pr_err("Memory error not recovered"); > @@ -1320,7 +1320,7 @@ static void kill_me_never(struct callback_head *cb) > > p->mce_count = 0; > pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr); > - if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0)) > + if (!memory_failure(p->mce_addr >> PAGE_SHIFT, MF_MCE_HANDLE)) > set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); > } > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 213cc569b192..f4703f948e9a 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3188,6 +3188,7 @@ enum mf_flags { > MF_MUST_KILL = 1 << 2, > MF_SOFT_OFFLINE = 1 << 3, > MF_UNPOISON = 1 << 4, > + MF_MCE_HANDLE = 1 << 5, > }; > extern int memory_failure(unsigned long pfn, int flags); > extern void memory_failure_queue(unsigned long pfn, int flags); > diff --git a/mm/memory-failure.c b/mm/memory-failure.c > index 97a9ed8f87a9..1a0bd91a685b 100644 > --- a/mm/memory-failure.c > +++ b/mm/memory-failure.c > @@ -1526,7 +1526,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) > if (TestClearPageHWPoison(head)) > num_poisoned_pages_dec(); > unlock_page(head); > - return 0; > + if (flags & MF_MCE_HANDLE) > + return 1; > + else > + return 0; > } > unlock_page(head); > res = MF_FAILED; > @@ -1545,6 +1548,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) > lock_page(head); > page_flags = head->flags; > > + if (hwpoison_filter(p)) { > + if (TestClearPageHWPoison(head)) > + num_poisoned_pages_dec(); > + put_page(p); > + if (flags & MF_MCE_HANDLE) > + res = 1; > + else > + res = 0; > + goto out; > + } > + > /* > * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so > * simply disable it. In order to make it work properly, we need > @@ -1613,7 +1627,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, > goto out; > > if (hwpoison_filter(page)) { > - rc = 0; > + if (flags & MF_MCE_HANDLE) > + rc = 1; > + else > + rc = 0; > goto unlock; > } > > @@ -1837,6 +1854,10 @@ int memory_failure(unsigned long pfn, int flags) > num_poisoned_pages_dec(); > unlock_page(p); > put_page(p); > + if (flags & MF_MCE_HANDLE) > + res = 1; > + else > + res = 0; > goto unlock_mutex; > } > > -- > 2.27.0