Hi all, On Tue, 26 Jun 2018 12:18:53 +1000 Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx> wrote: > > Today's linux-next merge of the nvdimm tree got a conflict in: > > arch/x86/kernel/cpu/mcheck/mce.c > > between commit: > > d3d6923cd1ae ("x86/mce: Carve out the crashing_cpu check") > > from the tip tree and commit: > > f6785eac562b ("x86/memory_failure: Introduce {set,clear}_mce_nospec()") > > from the nvdimm tree. > > I fixed it up (see below) and can carry the fix as necessary. This > is now fixed as far as linux-next is concerned, but any non trivial > conflicts should be mentioned to your upstream maintainer when your tree > is submitted for merging. You may also want to consider cooperating > with the maintainer of the conflicting tree to minimise any particularly > complex conflicts. > > -- > Cheers, > Stephen Rothwell > > diff --cc arch/x86/kernel/cpu/mcheck/mce.c > index 9a16f15f79eb,a0fbf0a8b7e6..000000000000 > --- a/arch/x86/kernel/cpu/mcheck/mce.c > +++ b/arch/x86/kernel/cpu/mcheck/mce.c > @@@ -1076,129 -1070,6 +1072,100 @@@ static int do_memory_failure(struct mc > return ret; > } > > - #ifndef mce_unmap_kpfn > - static void mce_unmap_kpfn(unsigned long pfn) > - { > - unsigned long decoy_addr; > - > - /* > - * Unmap this page from the kernel 1:1 mappings to make sure > - * we don't log more errors because of speculative access to > - * the page. > - * We would like to just call: > - * set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1); > - * but doing that would radically increase the odds of a > - * speculative access to the poison page because we'd have > - * the virtual address of the kernel 1:1 mapping sitting > - * around in registers. > - * Instead we get tricky. We create a non-canonical address > - * that looks just like the one we want, but has bit 63 flipped. > - * This relies on set_memory_np() not checking whether we passed > - * a legal address. > - */ > - > - decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); > - > - if (set_memory_np(decoy_addr, 1)) > - pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); > - } > - #endif > - > - > +/* > + * Cases where we avoid rendezvous handler timeout: > + * 1) If this CPU is offline. > + * > + * 2) If crashing_cpu was set, e.g. we're entering kdump and we need to > + * skip those CPUs which remain looping in the 1st kernel - see > + * crash_nmi_callback(). > + * > + * Note: there still is a small window between kexec-ing and the new, > + * kdump kernel establishing a new #MC handler where a broadcasted MCE > + * might not get handled properly. > + */ > +static bool __mc_check_crashing_cpu(int cpu) > +{ > + if (cpu_is_offline(cpu) || > + (crashing_cpu != -1 && crashing_cpu != cpu)) { > + u64 mcgstatus; > + > + mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); > + if (mcgstatus & MCG_STATUS_RIPV) { > + mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); > + return true; > + } > + } > + return false; > +} > + > +static void __mc_scan_banks(struct mce *m, struct mce *final, > + unsigned long *toclear, unsigned long *valid_banks, > + int no_way_out, int *worst) > +{ > + struct mca_config *cfg = &mca_cfg; > + int severity, i; > + > + for (i = 0; i < cfg->banks; i++) { > + __clear_bit(i, toclear); > + if (!test_bit(i, valid_banks)) > + continue; > + > + if (!mce_banks[i].ctl) > + continue; > + > + m->misc = 0; > + m->addr = 0; > + m->bank = i; > + > + m->status = mce_rdmsrl(msr_ops.status(i)); > + if (!(m->status & MCI_STATUS_VAL)) > + continue; > + > + /* > + * Corrected or non-signaled errors are handled by > + * machine_check_poll(). Leave them alone, unless this panics. > + */ > + if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) && > + !no_way_out) > + continue; > + > + /* Set taint even when machine check was not enabled. */ > + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); > + > + severity = mce_severity(m, cfg->tolerant, NULL, true); > + > + /* > + * When machine check was for corrected/deferred handler don't > + * touch, unless we're panicking. > + */ > + if ((severity == MCE_KEEP_SEVERITY || > + severity == MCE_UCNA_SEVERITY) && !no_way_out) > + continue; > + > + __set_bit(i, toclear); > + > + /* Machine check event was not enabled. Clear, but ignore. */ > + if (severity == MCE_NO_SEVERITY) > + continue; > + > + mce_read_aux(m, i); > + > + /* assuming valid severity level != 0 */ > + m->severity = severity; > + > + mce_log(m); > + > + if (severity > *worst) { > + *final = *m; > + *worst = severity; > + } > + } > + > + /* mce_clear_state will clear *final, save locally for use later */ > + *m = *final; > +} > + > /* > * The actual machine check handler. This only handles real > * exceptions when something got corrupted coming in through int 18. This is now a conflict between Linus' tree and the nvdimm tree. -- Cheers, Stephen Rothwell
Attachment:
pgpU2G3CkDuUO.pgp
Description: OpenPGP digital signature