On Wednesday 20 February 2019 06:35 AM, Paul Mackerras wrote: > This makes the handling of machine check interrupts that occur inside > a guest simpler and more robust, with less done in assembler code and > in real mode. > > Now, when a machine check occurs inside a guest, we always get the > machine check event struct and put a copy in the vcpu struct for the > vcpu where the machine check occurred. We no longer call > machine_check_queue_event() from kvmppc_realmode_mc_power7(), because > on POWER8, when a vcpu is running on an offline secondary thread and > we call machine_check_queue_event(), that calls irq_work_queue(), > which doesn't work because the CPU is offline, but instead triggers > the WARN_ON(lazy_irq_pending()) in pnv_smp_cpu_kill_self() (which > fires again and again because nothing clears the condition). > > All that machine_check_queue_event() actually does is to cause the > event to be printed to the console. For a machine check occurring in > the guest, we now print the event in kvmppc_handle_exit_hv() > instead. > > The assembly code at label machine_check_realmode now just calls C > code and then continues exiting the guest. We no longer either > synthesize a machine check for the guest in assembly code or return > to the guest without a machine check. > > The code in kvmppc_handle_exit_hv() is extended to handle the case > where the guest is not FWNMI-capable. In that case we now always > synthesize a machine check interrupt for the guest. Previously, if > the host thinks it has recovered the machine check fully, it would > return to the guest without any notification that the machine check > had occurred. If the machine check was caused by some action of the > guest (such as creating duplicate SLB entries), it is much better to > tell the guest that it has caused a problem. Therefore we now always > generate a machine check interrupt for guests that are not > FWNMI-capable. > > Signed-off-by: Paul Mackerras <paulus@xxxxxxxxxx> > --- > arch/powerpc/include/asm/kvm_ppc.h | 3 +- > arch/powerpc/kvm/book3s.c | 7 +++++ > arch/powerpc/kvm/book3s_hv.c | 18 +++++++++-- > arch/powerpc/kvm/book3s_hv_ras.c | 56 +++++++++------------------------ > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 40 ++--------------------- > 5 files changed, 42 insertions(+), 82 deletions(-) > > diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h > index b3bf4f6..d283d31 100644 > --- a/arch/powerpc/include/asm/kvm_ppc.h > +++ b/arch/powerpc/include/asm/kvm_ppc.h > @@ -143,6 +143,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); > > extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); > extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); > +extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags); > extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); > extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu); > extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu); > @@ -646,7 +647,7 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, > unsigned int yield_count); > long kvmppc_h_random(struct kvm_vcpu *vcpu); > void kvmhv_commence_exit(int trap); > -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); > +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); > void kvmppc_subcore_enter_guest(void); > void kvmppc_subcore_exit_guest(void); > long kvmppc_realmode_hmi_handler(void); > diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c > index 22a46c6..10c5579 100644 > --- a/arch/powerpc/kvm/book3s.c > +++ b/arch/powerpc/kvm/book3s.c > @@ -195,6 +195,13 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) > } > EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio); > > +void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags) > +{ > + /* might as well deliver this straight away */ > + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, flags); > +} > +EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check); > + > void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) > { > /* might as well deliver this straight away */ > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 1860c0b..d8bf05a 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -1215,6 +1215,22 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, > r = RESUME_GUEST; > break; > case BOOK3S_INTERRUPT_MACHINE_CHECK: > + /* Print the MCE event to host console. */ > + machine_check_print_event_info(&vcpu->arch.mce_evt, false); > + > + /* > + * If the guest can do FWNMI, exit to userspace so it can > + * deliver a FWNMI to the guest. > + * Otherwise we synthesize a machine check for the guest > + * so that it knows that the machine check occurred. > + */ > + if (!vcpu->kvm->arch.fwnmi_enabled) { > + ulong flags = vcpu->arch.shregs.msr & 0x083c0000; > + kvmppc_core_queue_machine_check(vcpu, flags); > + r = RESUME_GUEST; > + break; > + } > + > /* Exit to guest with KVM_EXIT_NMI as exit reason */ > run->exit_reason = KVM_EXIT_NMI; > run->hw.hardware_exit_reason = vcpu->arch.trap; > @@ -1227,8 +1243,6 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, > run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV; > > r = RESUME_HOST; > - /* Print the MCE event to host console. */ > - machine_check_print_event_info(&vcpu->arch.mce_evt, false); > break; > case BOOK3S_INTERRUPT_PROGRAM: > { > diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c > index 0787f12..9aa10b1 100644 > --- a/arch/powerpc/kvm/book3s_hv_ras.c > +++ b/arch/powerpc/kvm/book3s_hv_ras.c > @@ -69,7 +69,7 @@ static void reload_slb(struct kvm_vcpu *vcpu) > * > * Returns: 0 => exit guest, 1 => deliver machine check to guest You have to remove the above comment as the function now returns nothing. Reviewed-by: Aravinda Prasad <aravinda@xxxxxxxxxxxxxxxxxx> Regards, Aravinda > */ > -static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) > +static void kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) > { > unsigned long srr1 = vcpu->arch.shregs.msr; > struct machine_check_event mce_evt; > @@ -111,52 +111,24 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) > } > > /* > - * See if we have already handled the condition in the linux host. > - * We assume that if the condition is recovered then linux host > - * will have generated an error log event that we will pick > - * up and log later. > - * Don't release mce event now. We will queue up the event so that > - * we can log the MCE event info on host console. > + * Now get the event and stash it in the vcpu struct so it can > + * be handled by the primary thread in virtual mode. We can't > + * call machine_check_queue_event() here if we are running on > + * an offline secondary thread. > */ > - if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE)) > - goto out; > - > - if (mce_evt.version == MCE_V1 && > - (mce_evt.severity == MCE_SEV_NO_ERROR || > - mce_evt.disposition == MCE_DISPOSITION_RECOVERED)) > - handled = 1; > - > -out: > - /* > - * For guest that supports FWNMI capability, hook the MCE event into > - * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI > - * exit reason. On our way to exit we will pull this event from vcpu > - * structure and print it from thread 0 of the core/subcore. > - * > - * For guest that does not support FWNMI capability (old QEMU): > - * We are now going enter guest either through machine check > - * interrupt (for unhandled errors) or will continue from > - * current HSRR0 (for handled errors) in guest. Hence > - * queue up the event so that we can log it from host console later. > - */ > - if (vcpu->kvm->arch.fwnmi_enabled) { > - /* > - * Hook up the mce event on to vcpu structure. > - * First clear the old event. > - */ > - memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt)); > - if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) { > - vcpu->arch.mce_evt = mce_evt; > - } > - } else > - machine_check_queue_event(); > + if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) { > + if (handled && mce_evt.version == MCE_V1) > + mce_evt.disposition = MCE_DISPOSITION_RECOVERED; > + } else { > + memset(&mce_evt, 0, sizeof(mce_evt)); > + } > > - return handled; > + vcpu->arch.mce_evt = mce_evt; > } > > -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) > +void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) > { > - return kvmppc_realmode_mc_power7(vcpu); > + kvmppc_realmode_mc_power7(vcpu); > } > > /* Check if dynamic split is in force and return subcore size accordingly. */ > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index 9b8d50a..f24f6a2 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -2826,49 +2826,15 @@ kvm_cede_exit: > #endif /* CONFIG_KVM_XICS */ > 3: b guest_exit_cont > > - /* Try to handle a machine check in real mode */ > + /* Try to do machine check recovery in real mode */ > machine_check_realmode: > mr r3, r9 /* get vcpu pointer */ > bl kvmppc_realmode_machine_check > nop > + /* all machine checks go to virtual mode for further handling */ > ld r9, HSTATE_KVM_VCPU(r13) > li r12, BOOK3S_INTERRUPT_MACHINE_CHECK > - /* > - * For the guest that is FWNMI capable, deliver all the MCE errors > - * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit > - * reason. This new approach injects machine check errors in guest > - * address space to guest with additional information in the form > - * of RTAS event, thus enabling guest kernel to suitably handle > - * such errors. > - * > - * For the guest that is not FWNMI capable (old QEMU) fallback > - * to old behaviour for backward compatibility: > - * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either > - * through machine check interrupt (set HSRR0 to 0x200). > - * For handled errors (no-fatal), just go back to guest execution > - * with current HSRR0. > - * if we receive machine check with MSR(RI=0) then deliver it to > - * guest as machine check causing guest to crash. > - */ > - ld r11, VCPU_MSR(r9) > - rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */ > - bne guest_exit_cont /* if so, exit to host */ > - /* Check if guest is capable of handling NMI exit */ > - ld r10, VCPU_KVM(r9) > - lbz r10, KVM_FWNMI(r10) > - cmpdi r10, 1 /* FWNMI capable? */ > - beq guest_exit_cont /* if so, exit with KVM_EXIT_NMI. */ > - > - /* if not, fall through for backward compatibility. */ > - andi. r10, r11, MSR_RI /* check for unrecoverable exception */ > - beq 1f /* Deliver a machine check to guest */ > - ld r10, VCPU_PC(r9) > - cmpdi r3, 0 /* Did we handle MCE ? */ > - bne 2f /* Continue guest execution. */ > - /* If not, deliver a machine check. SRR0/1 are already set */ > -1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK > - bl kvmppc_msr_interrupt > -2: b fast_interrupt_c_return > + b guest_exit_cont > > /* > * Call C code to handle a HMI in real mode. > -- Regards, Aravinda