On Fri, Sep 21, 2018 at 08:01:57PM +1000, Paul Mackerras wrote: > From: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx> > > When running a nested (L2) guest the guest (L1) hypervisor will use > hypervisor privileged tlb invalidation instructions (to manage the > partition scoped page tables) which will result in hypervisor > emulation assistance interrupts. We emulate these instructions on behalf > of the L1 guest. > > The tlbie instruction can invalidate different scopes: > > Invalidate TLB for a given target address: > - This invalidates a single L2 -> L1 pte > - We need to invalidate any L2 -> L0 shadow_pgtable ptes which map the L2 > address space which is being invalidated. This is because a single > L2 -> L1 pte may have been mapped with more than one pte in the > L2 -> L0 page tables. > > Invalidate the entire TLB for a given LPID or for all LPIDs: > - Invalidate the entire shadow_pgtable for a given nested guest, or > for all nested guests. > > Invalidate the PWC (page walk cache) for a given LPID or for all LPIDs: > - We don't cache the PWC, so nothing to do > > Invalidate the entire TLB, PWC and partition table for a given/all LPIDs: > - Here we free the entire nest state since it will all need to be > reinitialised anyway. > > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx> > Signed-off-by: Paul Mackerras <paulus@xxxxxxxxxx> Reviewed-by: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx> > --- > arch/powerpc/include/asm/book3s/64/mmu-hash.h | 12 ++ > arch/powerpc/include/asm/ppc-opcode.h | 1 + > arch/powerpc/kvm/book3s_emulate.c | 1 - > arch/powerpc/kvm/book3s_hv_nested.c | 198 +++++++++++++++++++++++++- > 4 files changed, 209 insertions(+), 3 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h > index b3520b5..66db23e 100644 > --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h > +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h > @@ -203,6 +203,18 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) > BUG(); > } > > +static inline unsigned int ap_to_shift(unsigned long ap) > +{ > + int psize; > + > + for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { > + if (mmu_psize_defs[psize].ap == ap) > + return mmu_psize_defs[psize].shift; > + } > + > + return -1; > +} > + > static inline unsigned long get_sllp_encoding(int psize) > { > unsigned long sllp; > diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h > index 665af14..6093bc8 100644 > --- a/arch/powerpc/include/asm/ppc-opcode.h > +++ b/arch/powerpc/include/asm/ppc-opcode.h > @@ -104,6 +104,7 @@ > #define OP_31_XOP_LHZUX 311 > #define OP_31_XOP_MSGSNDP 142 > #define OP_31_XOP_MSGCLRP 174 > +#define OP_31_XOP_TLBIE 306 > #define OP_31_XOP_MFSPR 339 > #define OP_31_XOP_LWAX 341 > #define OP_31_XOP_LHAX 343 > diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c > index 2654df2..8c7e933 100644 > --- a/arch/powerpc/kvm/book3s_emulate.c > +++ b/arch/powerpc/kvm/book3s_emulate.c > @@ -36,7 +36,6 @@ > #define OP_31_XOP_MTSR 210 > #define OP_31_XOP_MTSRIN 242 > #define OP_31_XOP_TLBIEL 274 > -#define OP_31_XOP_TLBIE 306 > /* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */ > #define OP_31_XOP_FAKE_SC1 308 > #define OP_31_XOP_SLBMTE 402 > diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c > index 9a50feb..84c82a2 100644 > --- a/arch/powerpc/kvm/book3s_hv_nested.c > +++ b/arch/powerpc/kvm/book3s_hv_nested.c > @@ -457,7 +457,7 @@ void kvmhv_release_all_nested(struct kvm *kvm) > } > > /* caller must hold gp->tlb_lock */ > -void kvmhv_flush_nested(struct kvm_nested_guest *gp) > +static void kvmhv_flush_nested(struct kvm_nested_guest *gp) > { > struct kvm *kvm = gp->parent; > > @@ -650,10 +650,204 @@ static int kvmhv_emulate_priv_mfspr(struct kvm_run *run, struct kvm_vcpu *vcpu, > return EMULATE_FAIL; > } > > +static inline int get_ric(unsigned int instr) > +{ > + return (instr >> 18) & 0x3; > +} > + > +static inline int get_prs(unsigned int instr) > +{ > + return (instr >> 17) & 0x1; > +} > + > +static inline int get_r(unsigned int instr) > +{ > + return (instr >> 16) & 0x1; > +} > + > +static inline int get_lpid(unsigned long r_val) > +{ > + return r_val & 0xffffffff; > +} > + > +static inline int get_is(unsigned long r_val) > +{ > + return (r_val >> 10) & 0x3; > +} > + > +static inline int get_ap(unsigned long r_val) > +{ > + return (r_val >> 5) & 0x7; > +} > + > +static inline long get_epn(unsigned long r_val) > +{ > + return r_val >> 12; > +} > + > +static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid, > + int ap, long epn) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct kvm_nested_guest *gp; > + long npages; > + int shift; > + unsigned long addr; > + > + shift = ap_to_shift(ap); > + addr = epn << 12; > + if (shift < 0) { > + /* Invalid ap encoding */ > + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); > + return EMULATE_AGAIN; > + } > + > + addr &= ~((1UL << shift) - 1); > + npages = 1UL << (shift - PAGE_SHIFT); > + > + gp = kvmhv_get_nested(kvm, lpid, false); > + if (!gp) /* No such guest -> nothing to do */ > + return EMULATE_DONE; > + mutex_lock(&gp->tlb_lock); > + > + /* There may be more than one host page backing this single guest pte */ > + do { > + kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shift); > + > + npages -= 1UL << (shift - PAGE_SHIFT); > + addr += 1UL << shift; > + } while (npages > 0); > + > + mutex_unlock(&gp->tlb_lock); > + kvmhv_put_nested(gp); > + return EMULATE_DONE; > +} > + > +static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu, > + struct kvm_nested_guest *gp, int ric) > +{ > + struct kvm *kvm = vcpu->kvm; > + > + mutex_lock(&gp->tlb_lock); > + switch (ric) { > + case 0: > + /* Invalidate TLB */ > + spin_lock(&kvm->mmu_lock); > + kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, > + gp->shadow_lpid); > + radix__flush_tlb_lpid(gp->shadow_lpid); > + spin_unlock(&kvm->mmu_lock); > + break; > + case 1: > + /* > + * Invalidate PWC > + * We don't cache this -> nothing to do > + */ > + break; > + case 2: > + /* Invalidate TLB, PWC and caching of partition table entries */ > + kvmhv_flush_nested(gp); > + break; > + default: > + break; > + } > + mutex_unlock(&gp->tlb_lock); > +} > + > +static int kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct kvm_nested_guest *gp; > + int i, ret = EMULATE_DONE; > + > + spin_lock(&kvm->mmu_lock); > + for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { > + gp = kvm->arch.nested_guests[i]; > + if (gp) { > + spin_unlock(&kvm->mmu_lock); > + kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); > + spin_lock(&kvm->mmu_lock); > + } > + } > + spin_unlock(&kvm->mmu_lock); > + > + return ret; > +} > + > +static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr) > +{ > + struct kvm *kvm = vcpu->kvm; > + struct kvm_nested_guest *gp; > + int rs, rb; > + int r, ric, prs, is, ap; > + int lpid; > + long epn; > + int ret = EMULATE_DONE; > + > + rs = get_rs(instr); > + rb = get_rb(instr); > + > + ric = get_ric(instr); > + prs = get_prs(instr); > + r = get_r(instr); > + lpid = get_lpid(kvmppc_get_gpr(vcpu, rs)); > + is = get_is(kvmppc_get_gpr(vcpu, rb)); > + > + /* > + * These cases are invalid and __should__ have caused a machine check > + * r != 1 -> Only radix supported > + * prs == 1 -> Not HV privileged > + * ric == 3 -> No clusted bombs for radix > + * is == 1 -> Partition scoped translations not associated with pid > + * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA > + */ > + if ((!r) || (prs) || (ric == 3) || (is == 1) || > + ((!is) && (ric == 1 || ric == 2))) { > + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); > + return EMULATE_AGAIN; > + } > + > + switch (is) { > + case 0: > + /* > + * We know ric == 0 > + * Invalidate TLB for a given target address > + */ > + epn = get_epn(kvmppc_get_gpr(vcpu, rb)); > + ap = get_ap(kvmppc_get_gpr(vcpu, rb)); > + ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn); > + break; > + case 2: > + /* Invalidate matching LPID */ > + gp = kvmhv_get_nested(kvm, lpid, false); > + if (gp) { > + kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); > + kvmhv_put_nested(gp); > + } > + ret = EMULATE_DONE; > + break; > + case 3: > + /* Invalidate ALL LPIDs */ > + ret = kvmhv_emulate_tlbie_all_lpid(vcpu, ric); > + break; > + default: > + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); > + ret = EMULATE_AGAIN; > + break; > + } > + > + return ret; > +} > + > static int kvmhv_emulate_priv_op_31(struct kvm_run *run, struct kvm_vcpu *vcpu, > unsigned int instr) > { > - return EMULATE_FAIL; > + switch (get_xop(instr)) { > + case OP_31_XOP_TLBIE: > + return kvmhv_emulate_priv_tlbie(vcpu, instr); > + default: > + return EMULATE_FAIL; > + } > } > > static int kvmhv_emulate_priv_op(struct kvm_run *run, struct kvm_vcpu *vcpu, -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
Attachment:
signature.asc
Description: PGP signature