On Tue, Aug 17, 2021 at 05:29:40PM -0700, Tony Luck wrote: > @@ -1287,17 +1291,34 @@ static void kill_me_maybe(struct callback_head *cb) > } > } > > -static void queue_task_work(struct mce *m, int kill_current_task) > +static void queue_task_work(struct mce *m, char *msg, int kill_current_task) > { > - current->mce_addr = m->addr; > - current->mce_kflags = m->kflags; > - current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); > - current->mce_whole_page = whole_page(m); > + int count = ++current->mce_count; > > - if (kill_current_task) > - current->mce_kill_me.func = kill_me_now; > - else > - current->mce_kill_me.func = kill_me_maybe; > + /* First call, save all the details */ > + if (count == 1) { > + current->mce_addr = m->addr; > + current->mce_kflags = m->kflags; > + current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); > + current->mce_whole_page = whole_page(m); > + > + if (kill_current_task) > + current->mce_kill_me.func = kill_me_now; > + else > + current->mce_kill_me.func = kill_me_maybe; > + } > + > + /* Ten is likley overkill. Don't expect more than two faults before task_work() */ "likely" > + if (count > 10) > + mce_panic("Too many machine checks while accessing user data", m, msg); Ok, aren't we too nasty here? Why should we panic the whole box even with 10 MCEs? It is still user memory... IOW, why not: if (count > 10) current->mce_kill_me.func = kill_me_now; and when we return, that user process dies immediately. > + /* Second or later call, make sure page address matches the one from first call */ > + if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT)) > + mce_panic("Machine checks to different user pages", m, msg); Same question here. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette