Nicholas Piggin <npiggin@xxxxxxxxx> writes: > In the interest of minimising the amount of code that is run in > "real-mode", don't handle hcalls in real mode in the P9 path. > > POWER8 and earlier are much more expensive to exit from HV real mode > and switch to host mode, because on those processors HV interrupts get > to the hypervisor with the MMU off, and the other threads in the core > need to be pulled out of the guest, and SLBs all need to be saved, > ERATs invalidated, and host SLB reloaded before the MMU is re-enabled > in host mode. Hash guests also require a lot of hcalls to run. The > XICS interrupt controller requires hcalls to run. > > By contrast, POWER9 has independent thread switching, and in radix mode > the hypervisor is already in a host virtual memory mode when the HV > interrupt is taken. Radix + xive guests don't need hcalls to handle > interrupts or manage translations. > > So it's much less important to handle hcalls in real mode in P9. > > Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> > --- <snip> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index fa7614c37e08..17739aaee3d8 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -1142,12 +1142,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > } > > /* > - * Handle H_CEDE in the nested virtualization case where we haven't > - * called the real-mode hcall handlers in book3s_hv_rmhandlers.S. > + * Handle H_CEDE in the P9 path where we don't call the real-mode hcall > + * handlers in book3s_hv_rmhandlers.S. > + * > * This has to be done early, not in kvmppc_pseries_do_hcall(), so > * that the cede logic in kvmppc_run_single_vcpu() works properly. > */ > -static void kvmppc_nested_cede(struct kvm_vcpu *vcpu) > +static void kvmppc_cede(struct kvm_vcpu *vcpu) > { > vcpu->arch.shregs.msr |= MSR_EE; > vcpu->arch.ceded = 1; > @@ -1403,9 +1404,15 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, > /* hcall - punt to userspace */ > int i; > > - /* hypercall with MSR_PR has already been handled in rmode, > - * and never reaches here. > - */ > + if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) { > + /* > + * Guest userspace executed sc 1, reflect it back as a > + * privileged program check interrupt. > + */ > + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); > + r = RESUME_GUEST; > + break; > + } This patch bypasses sc_1_fast_return so it breaks KVM-PR. L1 loops with the following output: [ 9.503929][ T3443] Couldn't emulate instruction 0x4e800020 (op 19 xop 16) [ 9.503990][ T3443] kvmppc_exit_pr_progint: emulation at 48f4 failed (4e800020) [ 9.504080][ T3443] Couldn't emulate instruction 0x4e800020 (op 19 xop 16) [ 9.504170][ T3443] kvmppc_exit_pr_progint: emulation at 48f4 failed (4e800020) 0x4e800020 is a blr after a sc 1 in SLOF. For KVM-PR we need to inject a 0xc00 at some point, either here or before branching to no_try_real in book3s_hv_rmhandlers.S. > > run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); > for (i = 0; i < 9; ++i) > @@ -3663,6 +3670,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, > return trap; > } > > +static inline bool hcall_is_xics(unsigned long req) > +{ > + return (req == H_EOI || req == H_CPPR || req == H_IPI || > + req == H_IPOLL || req == H_XIRR || req == H_XIRR_X); > +} > + > /* > * Virtual-mode guest entry for POWER9 and later when the host and > * guest are both using the radix MMU. The LPIDR has already been set. > @@ -3774,15 +3787,36 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, > /* H_CEDE has to be handled now, not later */ > if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && > kvmppc_get_gpr(vcpu, 3) == H_CEDE) { > - kvmppc_nested_cede(vcpu); > + kvmppc_cede(vcpu); > kvmppc_set_gpr(vcpu, 3, 0); > trap = 0; > } > } else { > kvmppc_xive_push_vcpu(vcpu); > trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr); > + if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && > + !(vcpu->arch.shregs.msr & MSR_PR)) { > + unsigned long req = kvmppc_get_gpr(vcpu, 3); > + > + /* H_CEDE has to be handled now, not later */ > + if (req == H_CEDE) { > + kvmppc_cede(vcpu); > + kvmppc_xive_cede_vcpu(vcpu); /* may un-cede */ > + kvmppc_set_gpr(vcpu, 3, 0); > + trap = 0; > + > + /* XICS hcalls must be handled before xive is pulled */ > + } else if (hcall_is_xics(req)) { > + int ret; > + > + ret = kvmppc_xive_xics_hcall(vcpu, req); > + if (ret != H_TOO_HARD) { > + kvmppc_set_gpr(vcpu, 3, ret); > + trap = 0; > + } > + } > + } > kvmppc_xive_pull_vcpu(vcpu); > - > } > > vcpu->arch.slb_max = 0; > @@ -4442,8 +4476,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) > else > r = kvmppc_run_vcpu(vcpu); > > - if (run->exit_reason == KVM_EXIT_PAPR_HCALL && > - !(vcpu->arch.shregs.msr & MSR_PR)) { > + if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { > + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) { > + r = RESUME_GUEST; > + continue; > + } Note that this hunk might need to be dropped. > trace_kvm_hcall_enter(vcpu); > r = kvmppc_pseries_do_hcall(vcpu); > trace_kvm_hcall_exit(vcpu, r); > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index c11597f815e4..2d0d14ed1d92 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -1397,9 +1397,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) > mr r4,r9 > bge fast_guest_return > 2: > + /* If we came in through the P9 short path, no real mode hcalls */ > + lwz r0, STACK_SLOT_SHORT_PATH(r1) > + cmpwi r0, 0 > + bne no_try_real > /* See if this is an hcall we can handle in real mode */ > cmpwi r12,BOOK3S_INTERRUPT_SYSCALL > beq hcall_try_real_mode > +no_try_real: > > /* Hypervisor doorbell - exit only if host IPI flag set */ > cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL