Re: [PATCH 2/2] KVM: PPC: Book3E: Get vcpu's last instruction for emulation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 28.06.2013, at 11:20, Mihai Caraman wrote:

> lwepx faults needs to be handled by KVM and this implies additional code
> in DO_KVM macro to identify the source of the exception originated from
> host context. This requires to check the Exception Syndrome Register
> (ESR[EPID]) and External PID Load Context Register (EPLC[EGS]) for DTB_MISS,
> DSI and LRAT exceptions which is too intrusive for the host.
> 
> Get rid of lwepx and acquire last instuction in kvmppc_handle_exit() by
> searching for the physical address and kmap it. This fixes an infinite loop
> caused by lwepx's data TLB miss handled in the host and the TODO for TLB
> eviction and execute-but-not-read entries.
> 
> Signed-off-by: Mihai Caraman <mihai.caraman@xxxxxxxxxxxxx>
> ---
> Resend this pacth for Alex G. he was unsubscribed from kvm-ppc mailist
> for a while.
> 
> arch/powerpc/include/asm/mmu-book3e.h |    6 ++-
> arch/powerpc/kvm/booke.c              |    6 +++
> arch/powerpc/kvm/booke.h              |    2 +
> arch/powerpc/kvm/bookehv_interrupts.S |   32 ++-------------
> arch/powerpc/kvm/e500.c               |    4 ++
> arch/powerpc/kvm/e500mc.c             |   69 +++++++++++++++++++++++++++++++++
> 6 files changed, 91 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
> index 99d43e0..32e470e 100644
> --- a/arch/powerpc/include/asm/mmu-book3e.h
> +++ b/arch/powerpc/include/asm/mmu-book3e.h
> @@ -40,7 +40,10 @@
> 
> /* MAS registers bit definitions */
> 
> -#define MAS0_TLBSEL(x)		(((x) << 28) & 0x30000000)
> +#define MAS0_TLBSEL_MASK	0x30000000
> +#define MAS0_TLBSEL_SHIFT	28
> +#define MAS0_TLBSEL(x)		(((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
> +#define MAS0_GET_TLBSEL(mas0)	(((mas0) & MAS0_TLBSEL_MASK) >> MAS0_TLBSEL_SHIFT)
> #define MAS0_ESEL_MASK		0x0FFF0000
> #define MAS0_ESEL_SHIFT		16
> #define MAS0_ESEL(x)		(((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK)
> @@ -58,6 +61,7 @@
> #define MAS1_TSIZE_MASK		0x00000f80
> #define MAS1_TSIZE_SHIFT	7
> #define MAS1_TSIZE(x)		(((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
> +#define MAS1_GET_TSIZE(mas1)	(((mas1) & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT)
> 
> #define MAS2_EPN		(~0xFFFUL)
> #define MAS2_X0			0x00000040
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index 1020119..6764a8e 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -836,6 +836,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	/* update before a new last_exit_type is rewritten */
> 	kvmppc_update_timing_stats(vcpu);
> 
> +	/*
> +	 * The exception type can change at this point, such as if the TLB entry
> +	 * for the emulated instruction has been evicted.
> +	 */
> +	kvmppc_prepare_for_emulation(vcpu, &exit_nr);
> +
> 	/* restart interrupts if they were meant for the host */
> 	kvmppc_restart_interrupt(vcpu, exit_nr);
> 
> diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
> index 5fd1ba6..a0d0fea 100644
> --- a/arch/powerpc/kvm/booke.h
> +++ b/arch/powerpc/kvm/booke.h
> @@ -90,6 +90,8 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu);
> void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
> void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu);
> 
> +void kvmppc_prepare_for_emulation(struct kvm_vcpu *vcpu, unsigned int *exit_nr);
> +
> enum int_class {
> 	INT_CLASS_NONCRIT,
> 	INT_CLASS_CRIT,
> diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
> index 20c7a54..0538ab9 100644
> --- a/arch/powerpc/kvm/bookehv_interrupts.S
> +++ b/arch/powerpc/kvm/bookehv_interrupts.S
> @@ -120,37 +120,20 @@
> 
> 	.if	\flags & NEED_EMU
> 	/*
> -	 * This assumes you have external PID support.
> -	 * To support a bookehv CPU without external PID, you'll
> -	 * need to look up the TLB entry and create a temporary mapping.
> -	 *
> -	 * FIXME: we don't currently handle if the lwepx faults.  PR-mode
> -	 * booke doesn't handle it either.  Since Linux doesn't use
> -	 * broadcast tlbivax anymore, the only way this should happen is
> -	 * if the guest maps its memory execute-but-not-read, or if we
> -	 * somehow take a TLB miss in the middle of this entry code and
> -	 * evict the relevant entry.  On e500mc, all kernel lowmem is
> -	 * bolted into TLB1 large page mappings, and we don't use
> -	 * broadcast invalidates, so we should not take a TLB miss here.
> -	 *
> -	 * Later we'll need to deal with faults here.  Disallowing guest
> -	 * mappings that are execute-but-not-read could be an option on
> -	 * e500mc, but not on chips with an LRAT if it is used.
> +	 * We don't use external PID support. lwepx faults would need to be
> +	 * handled by KVM and this implies aditional code in DO_KVM (for
> +	 * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which
> +	 * is too intrusive for the host. Get last instuction in
> +	 * kvmppc_handle_exit().
> 	 */
> -
> -	mfspr	r3, SPRN_EPLC	/* will already have correct ELPID and EGS */
> 	PPC_STL	r15, VCPU_GPR(R15)(r4)
> 	PPC_STL	r16, VCPU_GPR(R16)(r4)
> 	PPC_STL	r17, VCPU_GPR(R17)(r4)
> 	PPC_STL	r18, VCPU_GPR(R18)(r4)
> 	PPC_STL	r19, VCPU_GPR(R19)(r4)
> -	mr	r8, r3
> 	PPC_STL	r20, VCPU_GPR(R20)(r4)
> -	rlwimi	r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
> 	PPC_STL	r21, VCPU_GPR(R21)(r4)
> -	rlwimi	r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
> 	PPC_STL	r22, VCPU_GPR(R22)(r4)
> -	rlwimi	r8, r10, EPC_EPID_SHIFT, EPC_EPID
> 	PPC_STL	r23, VCPU_GPR(R23)(r4)
> 	PPC_STL	r24, VCPU_GPR(R24)(r4)
> 	PPC_STL	r25, VCPU_GPR(R25)(r4)
> @@ -160,11 +143,6 @@
> 	PPC_STL	r29, VCPU_GPR(R29)(r4)
> 	PPC_STL	r30, VCPU_GPR(R30)(r4)
> 	PPC_STL	r31, VCPU_GPR(R31)(r4)
> -	mtspr	SPRN_EPLC, r8
> -	isync
> -	lwepx   r9, 0, r5
> -	mtspr	SPRN_EPLC, r3
> -	stw	r9, VCPU_LAST_INST(r4)
> 	.endif
> 
> 	.if	\flags & NEED_ESR
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index ce6b73c..c82a89f 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -439,6 +439,10 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
> 	return r;
> }
> 
> +void kvmppc_prepare_for_emulation(struct kvm_vcpu *vcpu, unsigned int *exit_nr)
> +{
> +}
> +
> struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
> {
> 	struct kvmppc_vcpu_e500 *vcpu_e500;
> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
> index c3bdc0a..3641df7 100644
> --- a/arch/powerpc/kvm/e500mc.c
> +++ b/arch/powerpc/kvm/e500mc.c
> @@ -271,6 +271,75 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
> 	return r;
> }
> 
> +void kvmppc_prepare_for_emulation(struct kvm_vcpu *vcpu, unsigned int *exit_nr)
> +{
> +	gva_t geaddr;
> +	hpa_t addr;
> +	u64 mas7_mas3;
> +	hva_t eaddr;
> +	u32 mas1, mas3;
> +	struct page *page;
> +	unsigned int addr_space, psize_shift;
> +	bool pr;
> +
> +	if ((*exit_nr != BOOKE_INTERRUPT_DATA_STORAGE) &&
> +	    (*exit_nr != BOOKE_INTERRUPT_DTLB_MISS) &&
> +	    (*exit_nr != BOOKE_INTERRUPT_HV_PRIV))
> +		return;
> +
> +	/* Search guest translation to find the real addressss */
> +	geaddr = vcpu->arch.pc;
> +	addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG;
> +	mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
> +	mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
> +	isync();
> +	asm volatile("tlbsx 0, %[geaddr]\n" : : [geaddr] "r" (geaddr));
> +	mtspr(SPRN_MAS5, 0);
> +	mtspr(SPRN_MAS8, 0);	
> +
> +	mas1 = mfspr(SPRN_MAS1);
> +	if (!(mas1 & MAS1_VALID)) {
> +		/*
> +	 	 * There is no translation for the emulated instruction.
> +		 * Simulate an instruction TLB miss. This should force the host
> +		 * or ultimately the guest to add the translation and then
> +		 * reexecute the instruction.
> +		 */
> +		*exit_nr = BOOKE_INTERRUPT_ITLB_MISS;
> +		return;
> +	}
> +
> +	mas3 = mfspr(SPRN_MAS3);
> +	pr = vcpu->arch.shared->msr & MSR_PR;
> +	if ((pr && (!(mas3 & MAS3_UX))) || ((!pr) && (!(mas3 & MAS3_SX)))) {
> +	 	/*
> +		 * Another thread may rewrite the TLB entry in parallel, don't
> +		 * execute from the address if the execute permission is not set
> +		 */
> +		vcpu->arch.fault_esr = 0;
> +		*exit_nr = BOOKE_INTERRUPT_INST_STORAGE;
> +		return;
> +	}
> +
> +	/* Get page size */
> +	if (MAS0_GET_TLBSEL(mfspr(SPRN_MAS0)) == 0)
> +		psize_shift = PAGE_SHIFT;
> +	else
> +		psize_shift = MAS1_GET_TSIZE(mas1) + 10;
> +
> +	mas7_mas3 = (((u64) mfspr(SPRN_MAS7)) << 32) |
> +		    mfspr(SPRN_MAS3);
> +	addr = (mas7_mas3 & (~0ULL << psize_shift)) |
> +	       (geaddr & ((1ULL << psize_shift) - 1ULL));
> +
> +	/* Map a page and get guest's instruction */
> +	page = pfn_to_page(addr >> PAGE_SHIFT);

While looking at this I just realized that you're missing a check here. What if our IP is in some PCI BAR? Or can't we execute from those?


Alex

> +	eaddr = (unsigned long)kmap_atomic(page);
> +	eaddr |= addr & ~PAGE_MASK;
> +	vcpu->arch.last_inst = *(u32 *)eaddr;
> +	kunmap_atomic((u32 *)eaddr);
> +}
> +
> struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
> {
> 	struct kvmppc_vcpu_e500 *vcpu_e500;
> -- 
> 1.7.4.1
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux