On Fri, 5 Mar 2021 15:55:25 +0000 "Luck, Tony" <tony.luck@xxxxxxxxx> wrote: > > From the walk, it seems we have got the virtual address, can we just send a SIGBUS with it? > > If the walk wins the race and the pte for the poisoned page is still valid, then yes. > > But we could have: > > CPU1 CPU2 > memory_failure sets poison > bit for struct page > > > rmap finds page in task > on CPU2 and sets PTE > to not-valid-poison > > memory_failure returns > early because struct page > already marked as poison > > walk page tables looking > for mapping - don't find it > > -Tony While I don't think there is a race condition, and if you really think the pfn with SIGBUS is not proper, I think following patch maybe one way. I copy your abandon code, and make a little modification, and just now it pass my simple test. And also this is a RFC version, only valid if you think the pfn with SIGBUS is not right. Thanks! >From a522ab8856e3a332a2318d57bb19f3c59594d462 Mon Sep 17 00:00:00 2001 From: Aili Yao <yaoaili@xxxxxxxxxxxx> Date: Wed, 10 Mar 2021 13:59:18 +0800 Subject: [PATCH] x86/mce: fix invalid SIGBUS address walk the current process pte and compare with the pfn; 1. only test for normal page and 2M hugetlb page; 2. 1G hugetlb and transparentHuge is not support currently; 3. May other fails is not recognized, This is a RFC version. --- arch/x86/kernel/cpu/mce/core.c | 83 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index db4afc5..65d7ef7 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -28,8 +28,12 @@ #include <linux/sysfs.h> #include <linux/types.h> #include <linux/slab.h> +#include <linux/hugetlb.h> +#include <linux/swap.h> +#include <linux/swapops.h> #include <linux/init.h> #include <linux/kmod.h> +#include <linux/pagewalk.h> #include <linux/poll.h> #include <linux/nmi.h> #include <linux/cpu.h> @@ -1235,6 +1239,81 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin /* mce_clear_state will clear *final, save locally for use later */ *m = *final; } +static int mc_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) +{ + u64 *buff = (u64 *)walk->private; + u64 pfn = buff[0]; + + if (!pte_present(*pte) && is_hwpoison_entry(pte_to_swp_entry(*pte))) + goto find; + else if (pte_pfn(*pte) == pfn) + goto find; + + return 0; +find: + buff[0] = addr; + buff[1] = PAGE_SHIFT; + return true; +} + +extern bool is_hugetlb_entry_hwpoisoned(pte_t pte); + +static int mc_hugetlb_range(pte_t *ptep, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + u64 *buff = (u64 *)walk->private; + u64 pfn = buff[0]; + int shift = PMD_SHIFT; + pte_t pte = huge_ptep_get(ptep); + + if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) + goto find; + + if (pte_pfn(*ptep) == pfn) + goto find; + + return 0; +find: + buff[0] = addr; + buff[1] = shift; + return true; +} + +static struct mm_walk_ops walk = { + .pte_entry = mc_pte_entry, + .hugetlb_entry = mc_hugetlb_range +}; + +void mc_memory_failure_error(struct task_struct *p, unsigned long pfn) +{ + u64 buff[2] = {pfn, 0}; + struct page *page; + int ret = -1; + + page = pfn_to_page(pfn); + if (!page) + goto force_sigbus; + + if (is_zone_device_page(page)) + goto force_sigbus; + + mmap_read_lock(p->mm); + ret = walk_page_range(p->mm, 0, TASK_SIZE_MAX, &walk, (void *)buff); + mmap_read_unlock(p->mm); + + if (ret && buff[0]) { + pr_err("Memory error may not recovered: %#llx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", + buff[0], p->comm, p->pid); + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)buff[0], buff[1]); + } else { +force_sigbus: + pr_err("Memory error may not recovered, pfn: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", + pfn, p->comm, p->pid); + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)pfn, PAGE_SHIFT); + } + +} static void kill_me_now(struct callback_head *ch) { @@ -1259,9 +1338,7 @@ static void kill_me_maybe(struct callback_head *cb) } if (p->mce_vaddr != (void __user *)-1l) { - pr_err("Memory error may not recovered: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n", - p->mce_addr >> PAGE_SHIFT, p->comm, p->pid); - force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT); + mc_memory_failure_error(current, p->mce_addr >> PAGE_SHIFT); } else { pr_err("Memory error not recovered"); kill_me_now(cb); -- 1.8.3.1 -- Thanks! Aili Yao