In order to run a hpt (hash page table) guest the kvm entry path used must enter real mode before loading up the guest mmu state. Currently the only path which does this is calling kvmppc_run_vcpu() which then uses the entry path in book3s_hv_rmhandlers.S and until now this path didn't accomodate running a nested guest. Have the nested hpt guest entry path call kvmppc_run_vcpu() and modify the entry path in book3s_hv_rmhandlers.S to be able to run a nested guest. For the entry path this means loading the smaller of the guest hypervisor decrementer and the host decrementer into the host decrementer since we want control back when either expires. Additionally the correct LPID and LPCR must be loaded, and the guest slb entries must be restored. When checking if an interrupt can be injected to the guest in kvmppc_guest_entry_inject_int() we return -1 if entering a nested guest while there is something pending for the L1 guest to indicate that the nested guest shouldn't be entered and control should be passed back to the L1 guest. On the exit path we must save the guest slb entries to be returned to the L1 guest hypervisor. Additionally the correct vrma_slb_v entry must be loaded for kvmppc_hpte_hv_fault() if the guest was in real mode. The correct hpt must be used in kvmppc_hpte_hv_fault(). And the correct handle_exit function must be called depending on whether a nested guest was being run or not. Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx> --- arch/powerpc/include/asm/kvm_asm.h | 5 ++ arch/powerpc/include/asm/kvm_book3s.h | 3 +- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 5 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 55 +++++++------- arch/powerpc/kvm/book3s_hv_builtin.c | 33 ++++++--- arch/powerpc/kvm/book3s_hv_interrupts.S | 25 ++++++- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 80 +++++++++++++++------ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 124 ++++++++++++++++++++++---------- arch/powerpc/kvm/book3s_xive.h | 15 ++++ 12 files changed, 252 insertions(+), 99 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 635fb154b33f..83bfd74ce67c 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -104,6 +104,11 @@ * completely in the guest. */ #define BOOK3S_INTERRUPT_HV_RM_HARD 0x5555 +/* + * Special trap used when running a nested guest to communicate that control + * should be passed back to the L1 guest. e.g. Because interrupt pending + */ +#define BOOK3S_INTERRUPT_HV_NEST_EXIT 0x5556 #define BOOK3S_IRQPRIO_SYSTEM_RESET 0 #define BOOK3S_IRQPRIO_DATA_SEGMENT 1 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index e1dc1872e453..f13dab096dad 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -158,7 +158,7 @@ extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long addr, unsigned long status); -extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, +extern long kvmppc_hv_find_lock_hpte(struct kvm_hpt_info *hpt, gva_t eaddr, unsigned long slb_v, unsigned long valid); extern int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long gpa, gva_t ea, int is_store); @@ -315,6 +315,7 @@ void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu); int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); +int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); void kvmhv_save_guest_slb(struct kvm_vcpu *vcpu, struct guest_slb *slbp); void kvmhv_restore_guest_slb(struct kvm_vcpu *vcpu, struct guest_slb *slbp); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2c4d659cf8bb..46bbdc38b2c5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -697,7 +697,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, unsigned long mfrr); int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); -void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu); +int kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu); /* * Host-side operations we want to set up while running in real diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 4ccb6b3a7fbd..7652ad430aab 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -511,9 +511,14 @@ int main(void) OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); + OFFSET(VCPU_LPCR, kvm_vcpu, arch.lpcr); OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested); + OFFSET(VCPU_NESTED_LPID, kvm_nested_guest, shadow_lpid); + OFFSET(VCPU_NESTED_RADIX, kvm_nested_guest, radix); + OFFSET(VCPU_NESTED_VRMA_SLB_V, kvm_nested_guest, vrma_slb_v); OFFSET(VCPU_CPU, kvm_vcpu, cpu); OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); + OFFSET(VCPU_HDEC_EXP, kvm_vcpu, arch.hdec_exp); #endif #ifdef CONFIG_PPC_BOOK3S OFFSET(VCPU_PURR, kvm_vcpu, arch.purr); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bbb23b3f8bb9..2b30b48dce49 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -361,7 +361,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, preempt_disable(); /* Find the HPTE in the hash table */ - index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v, + index = kvmppc_hv_find_lock_hpte(&kvm->arch.hpt, eaddr, slb_v, HPTE_V_VALID | HPTE_V_ABSENT); if (index < 0) { preempt_enable(); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8407071d5e22..4020bb52fca7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -74,6 +74,7 @@ #include <asm/hw_breakpoint.h> #include "book3s.h" +#include "book3s_xive.h" #define CREATE_TRACE_POINTS #include "trace_hv.h" @@ -1520,7 +1521,11 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) /* We're good on these - the host merely wanted to get our attention */ case BOOK3S_INTERRUPT_HV_DECREMENTER: vcpu->stat.dec_exits++; - r = RESUME_GUEST; + /* if the guest hdec has expired then it wants control back */ + if (mftb() >= vcpu->arch.hdec_exp) + r = RESUME_HOST; + else + r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_EXTERNAL: vcpu->stat.ext_intr_exits++; @@ -1583,6 +1588,15 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) if (!xics_on_xive()) kvmppc_xics_rm_complete(vcpu, 0); break; + case BOOK3S_INTERRUPT_HV_NEST_EXIT: + /* + * Occurs on nested guest entry path to indicate that control + * should be passed back to l1 guest hypervisor. + * e.g. because of pending interrupt + */ + vcpu->arch.trap = 0; + r = RESUME_HOST; + break; default: r = RESUME_HOST; break; @@ -2957,7 +2971,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) { int still_running = 0, i; u64 now; - long ret; struct kvm_vcpu *vcpu; spin_lock(&vc->lock); @@ -2978,13 +2991,16 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) trace_kvm_guest_exit(vcpu); - ret = RESUME_GUEST; - if (vcpu->arch.trap) - ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, - vcpu->arch.run_task); - - vcpu->arch.ret = ret; - vcpu->arch.trap = 0; + vcpu->arch.ret = RESUME_GUEST; + if (vcpu->arch.trap) { + if (vcpu->arch.nested) + vcpu->arch.ret = kvmppc_handle_nested_exit( + vcpu->arch.kvm_run, vcpu); + else + vcpu->arch.ret = kvmppc_handle_exit_hv( + vcpu->arch.kvm_run, vcpu, + vcpu->arch.run_task); + } spin_lock(&vc->lock); if (is_kvmppc_resume_guest(vcpu->arch.ret)) { @@ -3297,6 +3313,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) if (!vcpu->arch.ptid) thr0_done = true; active |= 1 << (thr + vcpu->arch.ptid); + vcpu->arch.trap = 0; } /* * We need to start the first thread of each subcore @@ -3847,21 +3864,6 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) vc->halt_poll_ns /= halt_poll_ns_shrink; } -#ifdef CONFIG_KVM_XICS -static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) -{ - if (!xics_on_xive()) - return false; - return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < - vcpu->arch.xive_saved_state.cppr; -} -#else -static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) -{ - return false; -} -#endif /* CONFIG_KVM_XICS */ - static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu) { if (vcpu->arch.pending_exceptions || vcpu->arch.prodded || @@ -4013,7 +4015,7 @@ static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu) return r; } -static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int n_ceded, i, r; struct kvmppc_vcore *vc; @@ -4082,7 +4084,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) continue; } for_each_runnable_thread(i, v, vc) { - kvmppc_core_prepare_to_enter(v); + if (!vcpu->arch.nested) + kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); v->stat.signal_exits++; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7c1909657b55..049c3111b530 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -759,26 +759,40 @@ void kvmhv_p9_restore_lpcr(struct kvm_split_mode *sip) * Is there a PRIV_DOORBELL pending for the guest (on POWER9)? * Can we inject a Decrementer or a External interrupt? */ -void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) +int kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) { int ext; unsigned long vec = 0; - unsigned long lpcr; + unsigned long old_lpcr, lpcr; + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * Don't enter a nested guest if there is something pending for this + * vcpu for the l1 guest. Return -1 to indicate this. + */ + if (vcpu->arch.nested && (vcpu->arch.pending_exceptions || + vcpu->arch.prodded || + vcpu->arch.doorbell_request || + xive_interrupt_pending(vcpu))) + return -1; +#endif /* Insert EXTERNAL bit into LPCR at the MER bit position */ ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1; - lpcr = mfspr(SPRN_LPCR); - lpcr |= ext << LPCR_MER_SH; - mtspr(SPRN_LPCR, lpcr); - isync(); + old_lpcr = mfspr(SPRN_LPCR); + lpcr = old_lpcr | (ext << LPCR_MER_SH); + if (lpcr != old_lpcr) { + mtspr(SPRN_LPCR, lpcr); + isync(); + } if (vcpu->arch.shregs.msr & MSR_EE) { if (ext) { vec = BOOK3S_INTERRUPT_EXTERNAL; } else { - long int dec = mfspr(SPRN_DEC); + s64 dec = mfspr(SPRN_DEC); if (!(lpcr & LPCR_LD)) - dec = (int) dec; + dec = (s32) dec; if (dec < 0) vec = BOOK3S_INTERRUPT_DECREMENTER; } @@ -795,12 +809,13 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr = msr; } - if (vcpu->arch.doorbell_request) { + if (cpu_has_feature(CPU_FTR_ARCH_300) && vcpu->arch.doorbell_request) { mtspr(SPRN_DPDES, 1); vcpu->arch.vcore->dpdes = 1; smp_wmb(); vcpu->arch.doorbell_request = 0; } + return 0; } static void flush_guest_tlb(struct kvm *kvm) diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 63fd81f3039d..624f9951731d 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -58,10 +58,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* * Put whatever is in the decrementer into the * hypervisor decrementer. + * If running a nested guest then put the lower of the host decrementer + * and the guest hypervisor decrementer into the hypervisor decrementer + * since we want control back from the nested guest when either expires. */ BEGIN_FTR_SECTION ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_KVM(r5) + ld r6, HSTATE_KVM_VCPU(r13) + cmpdi cr1, r6, 0 /* Do we actually have a vcpu? */ + beq cr1, 33f + ld r7, VCPU_NESTED(r6) + cmpdi cr1, r7, 0 /* Do we have a nested guest? */ + beq cr1, 33f + ld r10, VCPU_HDEC_EXP(r6) /* If so load the hdec expiry */ +33: ld r6, VCORE_KVM(r5) ld r9, KVM_HOST_LPCR(r6) andis. r9, r9, LPCR_LD@h END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) @@ -72,8 +82,17 @@ BEGIN_FTR_SECTION bne 32f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r8,r8 -32: mtspr SPRN_HDEC,r8 - add r8,r8,r7 +BEGIN_FTR_SECTION +32: beq cr1, 34f /* did we load hdec expiry above? */ + subf r10, r7, r10 /* r10 = guest_hdec = hdec_exp - tb */ + cmpd r8, r10 /* host decrementer < hdec? */ + ble 34f + mtspr SPRN_HDEC, r10 /* put guest_hdec into the hv decr */ + b 35f +34: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + mtspr SPRN_HDEC,r8 /* put host decr into hv decr */ +35: add r8,r8,r7 std r8,HSTATE_DECEXP(r13) /* Jump to partition switch code */ diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index f80491e9ff97..54d6ff0bee5b 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -386,7 +386,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (radix) r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu); else - r = RESUME_HOST; /* XXX TODO hpt entry path */ + r = kvmppc_run_vcpu(vcpu->arch.kvm_run, vcpu); } while (is_kvmppc_resume_guest(r)); /* save L2 state for return */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 53fe51d04d78..a939782d8a5e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -1166,8 +1166,8 @@ static struct mmio_hpte_cache_entry * * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK * can trigger deadlock issue. */ -long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, - unsigned long valid) +long kvmppc_hv_find_lock_hpte(struct kvm_hpt_info *hpt, gva_t eaddr, + unsigned long slb_v, unsigned long valid) { unsigned int i; unsigned int pshift; @@ -1195,7 +1195,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, somask = (1UL << 28) - 1; vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; } - hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); + hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(hpt); avpn = slb_v & ~(somask >> 16); /* also includes B */ avpn |= (eaddr & somask) >> 16; @@ -1206,7 +1206,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, val |= avpn; for (;;) { - hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); + hpte = (__be64 *)(hpt->virt + (hash << 7)); for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ @@ -1242,7 +1242,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, if (val & HPTE_V_SECONDARY) break; val |= HPTE_V_SECONDARY; - hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); + hash = hash ^ kvmppc_hpt_mask(hpt); } return -1; } @@ -1265,7 +1265,9 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, unsigned long slb_v, unsigned int status, bool data, bool is_realmode) { + struct kvm_nested_guest *nested; struct kvm *kvm = vcpu->kvm; + struct kvm_hpt_info *hpt; long int index; unsigned long v, r, gr, orig_v; __be64 *hpte; @@ -1275,12 +1277,20 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, struct mmio_hpte_cache_entry *cache_entry = NULL; long mmio_update = 0; + hpt = &kvm->arch.hpt; + nested = vcpu->arch.nested; + if (nested) + hpt = &nested->shadow_hpt; + /* For protection fault, expect to find a valid HPTE */ valid = HPTE_V_VALID; if (status & DSISR_NOHPTE) { valid |= HPTE_V_ABSENT; - mmio_update = atomic64_read(&kvm->arch.mmio_update); - cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); + if (!nested) { + mmio_update = atomic64_read(&kvm->arch.mmio_update); + cache_entry = mmio_cache_search(vcpu, addr, slb_v, + mmio_update); + } } if (cache_entry) { index = cache_entry->pte_index; @@ -1288,20 +1298,26 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, r = cache_entry->hpte_r; gr = cache_entry->rpte; } else { - index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); + index = kvmppc_hv_find_lock_hpte(hpt, addr, slb_v, valid); if (index < 0) { - if (status & DSISR_NOHPTE) + if (status & DSISR_NOHPTE) { + if (nested) { + /* have to look for HPTE in L1's HPT */ + vcpu->arch.pgfault_index = index; + return -1; + } return status; /* there really was no HPTE */ + } return 0; /* for prot fault, HPTE disappeared */ } - hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); + hpte = (__be64 *)(hpt->virt + (index << 4)); v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[1]); if (cpu_has_feature(CPU_FTR_ARCH_300)) { v = hpte_new_to_old_v(v, r); r = hpte_new_to_old_r(r); } - rev = &kvm->arch.hpt.rev[index]; + rev = &hpt->rev[index]; if (is_realmode) rev = real_vmalloc_addr(rev); gr = rev->guest_rpte; @@ -1318,17 +1334,25 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ if (!data) { - if (gr & (HPTE_R_N | HPTE_R_G)) - return status | SRR1_ISI_N_OR_G; - if (!hpte_read_permission(pp, slb_v & key)) - return status | SRR1_ISI_PROT; + if (gr & (HPTE_R_N | HPTE_R_G)) { + status |= SRR1_ISI_N_OR_G; + goto forward_to_guest; + } + if (!hpte_read_permission(pp, slb_v & key)) { + status |= SRR1_ISI_PROT; + goto forward_to_guest; + } } else if (status & DSISR_ISSTORE) { /* check write permission */ - if (!hpte_write_permission(pp, slb_v & key)) - return status | DSISR_PROTFAULT; + if (!hpte_write_permission(pp, slb_v & key)) { + status |= DSISR_PROTFAULT; + goto forward_to_guest; + } } else { - if (!hpte_read_permission(pp, slb_v & key)) - return status | DSISR_PROTFAULT; + if (!hpte_read_permission(pp, slb_v & key)) { + status |= DSISR_PROTFAULT; + goto forward_to_guest; + } } /* Check storage key, if applicable */ @@ -1343,13 +1367,14 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, /* Save HPTE info for virtual-mode handler */ vcpu->arch.pgfault_addr = addr; vcpu->arch.pgfault_index = index; + vcpu->arch.pgfault_hpte[0] = v; vcpu->arch.pgfault_hpte[1] = r; vcpu->arch.pgfault_cache = cache_entry; /* Check the storage key to see if it is possibly emulated MMIO */ - if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == - (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { + if (!nested && (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { if (!cache_entry) { unsigned int pshift = 12; unsigned int pshift_index; @@ -1373,5 +1398,18 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, } return -1; /* send fault up to host kernel mode */ + +forward_to_guest: + if (nested) { + /* + * This was technically caused by missing permissions in the L1 + * pte, go up to the virtual mode handler so we can forward + * this interrupt to L1. + */ + vcpu->arch.pgfault_index = -1; + vcpu->arch.fault_dsisr = status; + return -1; + } + return status; } EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 54e1864d4702..43cdd9f7fab5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -606,15 +606,29 @@ kvmppc_hv_entry: cmpwi r6,0 bne 10f - lwz r7,KVM_LPID(r9) + /* Load guest lpid (on P9 need to check if running a nested guest) */ BEGIN_FTR_SECTION + cmpdi r4, 0 /* do we have a vcpu? */ + beq 19f + ld r5, VCPU_NESTED(r4) /* vcpu running nested guest? */ + cmpdi cr2, r5, 0 /* use cr2 as indication of nested */ + /* + * If we're using this entry path for a nested guest that nested guest + * must be hash, otherwise we'd have used __kvmhv_vcpu_entry_p9. + */ + beq cr2, 19f + ld r7, VCPU_NESTED_LPID(r5) + b 20f +19: +FTR_SECTION_ELSE ld r6,KVM_SDR1(r9) li r0,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r0 ptesync mtspr SPRN_SDR1,r6 /* switch to partition page table */ -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - mtspr SPRN_LPID,r7 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + lwz r7,KVM_LPID(r9) +20: mtspr SPRN_LPID,r7 isync /* See if we need to flush the TLB. */ @@ -892,7 +906,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) HMT_MEDIUM 21: /* Set LPCR. */ - ld r8,VCORE_LPCR(r5) + ld r8,VCPU_LPCR(r4) mtspr SPRN_LPCR,r8 isync @@ -915,10 +929,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) blt hdec_soon /* For hash guest, clear out and reload the SLB */ +BEGIN_FTR_SECTION + bne cr2, 10f /* cr2 indicates nested -> hash */ ld r6, VCPU_KVM(r4) lbz r0, KVM_RADIX(r6) cmpwi r0, 0 bne 9f +10: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) li r6, 0 slbmte r6, r6 slbia @@ -1018,19 +1036,18 @@ no_xive: stw r0, STACK_SLOT_SHORT_PATH(r1) deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */ - /* Check if we can deliver an external or decrementer interrupt now */ - ld r0, VCPU_PENDING_EXC(r4) -BEGIN_FTR_SECTION - /* On POWER9, also check for emulated doorbell interrupt */ - lbz r3, VCPU_DBELL_REQ(r4) - or r0, r0, r3 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - cmpdi r0, 0 - beq 71f + /* Check if we can deliver external/decrementer/dbell interrupt now */ mr r3, r4 bl kvmppc_guest_entry_inject_int - ld r4, HSTATE_KVM_VCPU(r13) + cmpdi r3, 0 + beq 71f + /* kvmppc_guest_entry_inject_int returned -1 don't enter nested guest */ + ld r9, HSTATE_KVM_VCPU(r13) + li r12, BOOK3S_INTERRUPT_HV_NEST_EXIT + b guest_exit_cont + 71: + ld r4, HSTATE_KVM_VCPU(r13) ld r6, VCPU_SRR0(r4) ld r7, VCPU_SRR1(r4) mtspr SPRN_SRR0, r6 @@ -1462,11 +1479,17 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ bne guest_exit_short_path /* For hash guest, read the guest SLB and save it away */ - ld r5, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r5) li r5, 0 +BEGIN_FTR_SECTION + ld r6, VCPU_NESTED(r9) /* vcpu running nested guest? */ + cmpdi r6, 0 + bne 4f /* must be hash if we're nested */ + ld r7, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r7) cmpwi r0, 0 bne 3f /* for radix, save 0 entries */ +4: +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ mtctr r0 li r6,0 @@ -1517,7 +1540,7 @@ guest_bypass: mftb r6 /* On P9, if the guest has large decr enabled, don't sign extend */ BEGIN_FTR_SECTION - ld r4, VCORE_LPCR(r3) + ld r4, VCPU_LPCR(r9) andis. r4, r4, LPCR_LD@h bne 16f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) @@ -1749,6 +1772,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* * Are we running hash or radix ? */ + ld r6, VCPU_NESTED(r9) /* vcpu running nested guest? */ + cmpdi r6, 0 + bne 2f /* must be hash if we're nested */ ld r5, VCPU_KVM(r9) lbz r0, KVM_RADIX(r5) cmpwi cr2, r0, 0 @@ -2036,22 +2062,38 @@ kvmppc_tm_emul: * reflect the HDSI to the guest as a DSI. */ kvmppc_hdsi: - ld r3, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r3) mfspr r4, SPRN_HDAR mfspr r6, SPRN_HDSISR BEGIN_FTR_SECTION /* Look for DSISR canary. If we find it, retry instruction */ cmpdi r6, 0x7fff beq 6f + /* Are we hash or radix? */ + ld r3, VCPU_NESTED(r9) + cmpdi cr2, r3, 0 + beq cr2, 10f + lbz r0, VCPU_NESTED_RADIX(r3) /* nested check nested->radix */ + b 11f +10: ld r5, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r5) /* !nested check kvm->arch.radix */ +11: cmpwi r0, 0 + bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - cmpwi r0, 0 - bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ /* HPTE not found fault or protection fault? */ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h beq 1f /* if not, send it to the guest */ andi. r0, r11, MSR_DR /* data relocation enabled? */ - beq 3f + bne 3f + /* not relocated, load the VRMA_SLB_V for kvmppc_hpte_hv_fault() */ +BEGIN_FTR_SECTION + beq cr2, 12f /* cr2 indicates nested */ + ld r5, VCPU_NESTED_VRMA_SLB_V(r3) /* r3 = nested (loaded above) */ + b 4f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ld r5, VCPU_KVM(r9) +12: ld r5, KVM_VRMA_SLB_V(r5) + b 4f +3: BEGIN_FTR_SECTION mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ b 4f @@ -2097,10 +2139,6 @@ fast_interrupt_c_return: mr r4, r9 b fast_guest_return -3: ld r5, VCPU_KVM(r9) /* not relocated, use VRMA */ - ld r5, KVM_VRMA_SLB_V(r5) - b 4b - /* If this is for emulated MMIO, load the instruction word */ 2: li r8, KVM_INST_FETCH_FAILED /* In case lwz faults */ @@ -2137,14 +2175,32 @@ fast_interrupt_c_return: * it is an HPTE not found fault for a page that we have paged out. */ kvmppc_hisi: - ld r3, VCPU_KVM(r9) - lbz r0, KVM_RADIX(r3) - cmpwi r0, 0 +BEGIN_FTR_SECTION + /* Are we hash or radix? */ + ld r3, VCPU_NESTED(r9) + cmpdi cr2, r3, 0 + beq cr2, 10f + lbz r0, VCPU_NESTED_RADIX(r3) /* nested check nested->radix */ + b 11f +10: ld r6, VCPU_KVM(r9) + lbz r0, KVM_RADIX(r6) /* !nested check kvm->arch.radix */ +11: cmpwi r0, 0 bne .Lradix_hisi /* for radix, just save ASDR */ +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) andis. r0, r11, SRR1_ISI_NOPT@h beq 1f andi. r0, r11, MSR_IR /* instruction relocation enabled? */ - beq 3f + bne 3f + /* not relocated, load the VRMA_SLB_V for kvmppc_hpte_hv_fault() */ +BEGIN_FTR_SECTION + beq cr2, 12f /* cr2 indicates nested */ + ld r5, VCPU_NESTED_VRMA_SLB_V(r3) /* r3 = nested (loaded above) */ + b 4f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ld r6, VCPU_KVM(r9) +12: ld r5, KVM_VRMA_SLB_V(r6) + b 4f +3: BEGIN_FTR_SECTION mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ b 4f @@ -2179,10 +2235,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) bl kvmppc_msr_interrupt b fast_interrupt_c_return -3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ - ld r5, KVM_VRMA_SLB_V(r6) - b 4b - /* * Try to handle an hcall in real mode. * Returns to the guest if we handle it, or continues on up to @@ -2624,8 +2676,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) mftb r5 BEGIN_FTR_SECTION /* On P9 check whether the guest has large decrementer mode enabled */ - ld r6, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_LPCR(r6) + ld r6, HSTATE_KVM_VCPU(r13) + ld r6, VCPU_LPCR(r6) andis. r6, r6, LPCR_LD@h bne 68f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 50494d0ee375..d6f10d7ec4d2 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -283,5 +283,20 @@ int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, bool single_escalation); struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); +static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) +{ + if (!xics_on_xive()) + return false; + return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < + vcpu->arch.xive_saved_state.cppr; +} + +#else /* !CONFIG_KVM_XICS */ + +static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) +{ + return false; +} + #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ -- 2.13.6