Recovery action when get_user() triggers a machine check uses the fixup path to make get_user() return -EFAULT. Also queue_task_work() sets up so that kill_me_maybe() will be called on return to user mode to send a SIGBUS to the current process. But there are places in the kernel where the code assumes that this EFAULT return was simply because of a page fault. The code takes some action to fix that, and then retries the access. This results in a second machine check. While processing this second machine check queue_task_work() is called again. But since this uses the same callback_head structure that was used in the first call, the net result is an entry on the current->task_works list that points to itself. When task_work_run() is called it loops forever in this code: do { next = work->next; work->func(work); work = next; cond_resched(); } while (work); Add a "mce_busy" flag bit to detect this situation and panic when it happens. Signed-off-by: Tony Luck <tony.luck@xxxxxxxxx> --- arch/x86/kernel/cpu/mce/core.c | 7 ++++++- include/linux/sched.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 13d3f1cbda17..1bf11213e093 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1246,6 +1246,7 @@ static void kill_me_maybe(struct callback_head *cb) struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); int flags = MF_ACTION_REQUIRED; + p->mce_busy = 0; pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); if (!p->mce_ripv) @@ -1268,6 +1269,7 @@ static void kill_me_maybe(struct callback_head *cb) static void queue_task_work(struct mce *m, int kill_current_task) { + current->mce_busy = 1; current->mce_addr = m->addr; current->mce_kflags = m->kflags; current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); @@ -1431,8 +1433,11 @@ noinstr void do_machine_check(struct pt_regs *regs) mce_panic("Failed kernel mode recovery", &m, msg); } - if (m.kflags & MCE_IN_KERNEL_COPYIN) + if (m.kflags & MCE_IN_KERNEL_COPYIN) { + if (current->mce_busy) + mce_panic("Multiple copyin", &m, msg); queue_task_work(&m, kill_current_task); + } } out: mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e3a5eeec509..a763a76eac57 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1360,7 +1360,8 @@ struct task_struct { u64 mce_addr; __u64 mce_ripv : 1, mce_whole_page : 1, - __mce_reserved : 62; + mce_busy : 1, + __mce_reserved : 61; struct callback_head mce_kill_me; #endif -- 2.21.1