Re: [PATCH v3 2/2] KVM: PPC: e500: Save/restore SPE state

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 30.03.2011, at 01:43, Scott Wood wrote:

> This is done lazily; the SPE save will be done only if the guest has used
> SPE since the last preemption or heavyweight exit, and restore will be
> done only on demand.
> 
> For SPEFSCR, Linux already switches it on context switch (non-lazily), so
> the only remaining bit is to save it between qemu and the guest.
> 
> A vcpu field "msr_block" is introduced in order to be the subtractive
> (and dynamic) equivalent of KVMPPC_MSR_MASK -- bits that are set in
> msr_block are prevented from being set in the hardware MSR while in the
> guest.
> 
> Signed-off-by: Liu Yu <yu.liu@xxxxxxxxxxxxx>
> Signed-off-by: Scott Wood <scottwood@xxxxxxxxxxxxx>
> ---
> Changes in v3:
> - removed fsl ifdef
> - inverted sense of msr_block, so zero-initialization works
>   (more intuitive this way, too)
> - eliminated msr_block accesors
> - handled SPE exceptions when kernel doesn't have SPE enabled
> - dropped the wrapper around __kvmppc_vcpu_run(), as the state saving
>   in kvmppc_core_vcpu_put should be sufficient, and the loading
>   on heavyweight entry was a dubious optimization (we don't do it
>   on sched-in, it would only really improve things if you have
>   a tight loop of Qemu I/O combined with SPE instructions, and it
>   can cause a guest to be considered to be actively using SPE even if
>   it's just had the MSR bit set without use for a long time)
> 
> arch/powerpc/include/asm/kvm_host.h  |    7 ++++
> arch/powerpc/include/asm/reg_booke.h |    1 +
> arch/powerpc/kernel/asm-offsets.c    |    9 +++++
> arch/powerpc/kvm/booke.c             |   36 +++++++++++++++++-
> arch/powerpc/kvm/booke_interrupts.S  |   67 +++++++++++++++++++++++++++++++++-
> arch/powerpc/kvm/e500.c              |   26 +++++++++++++-
> 6 files changed, 142 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index bba3b9b..6717049 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -195,6 +195,12 @@ struct kvm_vcpu_arch {
> 	u64 fpr[32];
> 	u64 fpscr;
> 
> +#ifdef CONFIG_SPE
> +	ulong evr[32];
> +	ulong spefscr;
> +	ulong host_spefscr;
> +	u64 acc;
> +#endif
> #ifdef CONFIG_ALTIVEC
> 	vector128 vr[32];
> 	vector128 vscr;
> @@ -215,6 +221,7 @@ struct kvm_vcpu_arch {
> 	ulong lr;
> 
> 	ulong xer;
> +	ulong msr_block;
> 	u32 cr;
> #endif
> 
> diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
> index 3b1a9b7..2705f9a 100644
> --- a/arch/powerpc/include/asm/reg_booke.h
> +++ b/arch/powerpc/include/asm/reg_booke.h
> @@ -312,6 +312,7 @@
> #define ESR_ILK		0x00100000	/* Instr. Cache Locking */
> #define ESR_PUO		0x00040000	/* Unimplemented Operation exception */
> #define ESR_BO		0x00020000	/* Byte Ordering */
> +#define ESR_SPV		0x00000080	/* Signal Processing operation */
> 
> /* Bit definitions related to the DBCR0. */
> #if defined(CONFIG_40x)
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 23e6a93..52400ff 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -403,6 +403,9 @@ int main(void)
> 	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
> 	DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
> 
> +#ifdef CONFIG_BOOKE
> +	DEFINE(VCPU_MSR_BLOCK, offsetof(struct kvm_vcpu, arch.msr_block));
> +#endif
> 	/* book3s */
> #ifdef CONFIG_PPC_BOOK3S
> 	DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
> @@ -494,6 +497,12 @@ int main(void)
> 	DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
> 	DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
> #endif
> +#ifdef CONFIG_SPE
> +	DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0]));
> +	DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc));
> +	DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr));
> +	DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
> +#endif /* CONFIG_SPE */
> 
> #ifdef CONFIG_KVM_EXIT_TIMING
> 	DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index ef76acb..ade7cf6 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -13,6 +13,7 @@
>  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
>  *
>  * Copyright IBM Corp. 2007
> + * Copyright 2010-2011 Freescale Semiconductor, Inc.
>  *
>  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
>  *          Christian Ehrhardt <ehrhardt@xxxxxxxxxxxxxxxxxx>
> @@ -344,10 +345,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 		r = RESUME_GUEST;
> 		break;
> 
> -	case BOOKE_INTERRUPT_SPE_UNAVAIL:
> -		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
> +#ifdef CONFIG_SPE
> +	case BOOKE_INTERRUPT_SPE_UNAVAIL: {
> +		extern void kvmppc_vcpu_spe_load(struct kvm_vcpu *vcpu);
> +
> +		/* reload the SPE env if guest first use SPE since last save */
> +		if (vcpu->arch.msr_block & MSR_SPE)
> +			kvmppc_vcpu_spe_load(vcpu);
> +
> +		if (!(vcpu->arch.shared->msr & MSR_SPE))
> +			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);

These are in the wrong order, no? If the guest doesn't do SPE, we can save ourselves the loading.

> 		r = RESUME_GUEST;
> 		break;
> +	}
> 
> 	case BOOKE_INTERRUPT_SPE_FP_DATA:
> 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
> @@ -358,6 +368,28 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
> 		r = RESUME_GUEST;
> 		break;
> +#else
> +	case BOOKE_INTERRUPT_SPE_UNAVAIL:
> +		/*
> +		 * Guest wants SPE, but host kernel doesn't support it.  Send
> +		 * an "unimplemented operation" program check to the guest.
> +		 */
> +		kvmppc_core_queue_program(vcpu, ESR_PUO | ESR_SPV);
> +		r = RESUME_GUEST;
> +		break;
> +
> +	/*
> +	 * These really should never happen without CONFIG_SPE,
> +	 * as we should never enable the real MSR[SPE] in the guest.
> +	 */
> +	case BOOKE_INTERRUPT_SPE_FP_DATA:
> +	case BOOKE_INTERRUPT_SPE_FP_ROUND:
> +		printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
> +		       __func__, exit_nr, vcpu->arch.pc);
> +		run->hw.hardware_exit_reason = exit_nr;
> +		r = RESUME_HOST;
> +		break;
> +#endif
> 
> 	case BOOKE_INTERRUPT_DATA_STORAGE:
> 		kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
> diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
> index 1cc471f..ebe882a 100644
> --- a/arch/powerpc/kvm/booke_interrupts.S
> +++ b/arch/powerpc/kvm/booke_interrupts.S
> @@ -13,6 +13,7 @@
>  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
>  *
>  * Copyright IBM Corp. 2007
> + * Copyright 2011 Freescale Semiconductor, Inc.
>  *
>  * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
>  */
> @@ -241,6 +242,14 @@ _GLOBAL(kvmppc_resume_host)
> heavyweight_exit:
> 	/* Not returning to guest. */
> 
> +#ifdef CONFIG_SPE
> +	/* save guest SPEFSCR and load host SPEFSCR */
> +	mfspr	r9, SPRN_SPEFSCR
> +	stw	r9, VCPU_SPEFSCR(r4)
> +	lwz	r9, VCPU_HOST_SPEFSCR(r4)
> +	mtspr	SPRN_SPEFSCR, r9
> +#endif
> +
> 	/* We already saved guest volatile register state; now save the
> 	 * non-volatiles. */
> 	stw	r15, VCPU_GPR(r15)(r4)
> @@ -342,6 +351,14 @@ _GLOBAL(__kvmppc_vcpu_run)
> 	lwz	r30, VCPU_GPR(r30)(r4)
> 	lwz	r31, VCPU_GPR(r31)(r4)
> 
> +#ifdef CONFIG_SPE
> +	/* save host SPEFSCR and load guest SPEFSCR */
> +	mfspr	r3, SPRN_SPEFSCR
> +	stw	r3, VCPU_HOST_SPEFSCR(r4)
> +	lwz	r3, VCPU_SPEFSCR(r4)
> +	mtspr	SPRN_SPEFSCR, r3
> +#endif
> +
> lightweight_exit:
> 	stw	r2, HOST_R2(r1)
> 
> @@ -409,7 +426,6 @@ lightweight_exit:
> 	mtctr	r3
> 	lwz	r3, VCPU_CR(r4)
> 	mtcr	r3
> -	lwz	r5, VCPU_GPR(r5)(r4)
> 	lwz	r6, VCPU_GPR(r6)(r4)
> 	lwz	r7, VCPU_GPR(r7)(r4)
> 	lwz	r8, VCPU_GPR(r8)(r4)
> @@ -417,8 +433,11 @@ lightweight_exit:
> 	mtsrr0	r3
> 	lwz	r3, VCPU_SHARED(r4)
> 	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
> +	lwz	r5, VCPU_MSR_BLOCK(r4)
> 	oris	r3, r3, KVMPPC_MSR_MASK@h
> 	ori	r3, r3, KVMPPC_MSR_MASK@l
> +	andc	r3, r3, r5
> +	lwz	r5, VCPU_GPR(r5)(r4)

This is a lot cleaner than the last version, thank you :).

I was wondering why we need to take the guest's MSR value into account in the first place though. Are there any bits in there that we wouldn't catch through a lazy trap? At the end of the day, the MSR will be shared between the guest and the host without traps, so we will depend on trapping for events anyways.

Why can't we just keep a "guest mode MSR" field in the vcpu that contains the bits activated inside the guest? This way no bit just accidently slips through and we save a good bunch of instructions in the lightweight exit path.

> 	mtsrr1	r3
> 
> 	/* Clear any debug events which occurred since we disabled MSR[DE].
> @@ -431,3 +450,49 @@ lightweight_exit:
> 	lwz	r3, VCPU_GPR(r3)(r4)
> 	lwz	r4, VCPU_GPR(r4)(r4)
> 	rfi
> +
> +#ifdef CONFIG_SPE
> +#define KVMPPC_SAVE_EVR(n,s,base)	evmergehi s,s,n; stw s,(4*(n))(base)

Can't we somehow combine those with the normal Linux SAVE_EVR macros? All we need to do is define a new set of macros that take a generic offset and make the normal macros use that with offset=THREAD_EVR0 while we use offset=0. This means we could probably even specify the offset directly inside the kvm code. And maybe even the offset in the code that currently uses SAVE_EVR.

Sorry for only mentioning those things so late.

> +#define KVMPPC_SAVE_2EVR(n,s,base)	KVMPPC_SAVE_EVR(n,s,base); \
> +					   KVMPPC_SAVE_EVR(n+1,s,base)
> +#define KVMPPC_SAVE_4EVR(n,s,base)	KVMPPC_SAVE_2EVR(n,s,base); \
> +					   KVMPPC_SAVE_2EVR(n+2,s,base)
> +#define KVMPPC_SAVE_8EVR(n,s,base)	KVMPPC_SAVE_4EVR(n,s,base); \
> +					   KVMPPC_SAVE_4EVR(n+4,s,base)
> +#define KVMPPC_SAVE_16EVR(n,s,base)	KVMPPC_SAVE_8EVR(n,s,base); \
> +					   KVMPPC_SAVE_8EVR(n+8,s,base)
> +#define KVMPPC_SAVE_32EVR(n,s,base)	KVMPPC_SAVE_16EVR(n,s,base); \
> +					   KVMPPC_SAVE_16EVR(n+16,s,base)
> +#define KVMPPC_LOAD_EVR(n,s,base)	lwz s,(4*(n))(base); evmergelo n,s,n
> +#define KVMPPC_LOAD_2EVR(n,s,base)	KVMPPC_LOAD_EVR(n,s,base); \
> +					   KVMPPC_LOAD_EVR(n+1,s,base)
> +#define KVMPPC_LOAD_4EVR(n,s,base)	KVMPPC_LOAD_2EVR(n,s,base); \
> +					   KVMPPC_LOAD_2EVR(n+2,s,base)
> +#define KVMPPC_LOAD_8EVR(n,s,base)	KVMPPC_LOAD_4EVR(n,s,base); \
> +					   KVMPPC_LOAD_4EVR(n+4,s,base)
> +#define KVMPPC_LOAD_16EVR(n,s,base)	KVMPPC_LOAD_8EVR(n,s,base); \
> +					   KVMPPC_LOAD_8EVR(n+8,s,base)
> +#define KVMPPC_LOAD_32EVR(n,s,base)	KVMPPC_LOAD_16EVR(n,s,base); \
> +					   KVMPPC_LOAD_16EVR(n+16,s,base)
> +
> +_GLOBAL(kvmppc_save_guest_spe)
> +	cmpi	0,r3,0
> +	beqlr-
> +	addi	r5,r3,VCPU_EVR
> +	KVMPPC_SAVE_32EVR(0,r4,r5)	/* save evr[32] */
> +	evxor   evr6, evr6, evr6
> +	evmwumiaa evr6, evr6, evr6
> +	li	r4,VCPU_ACC
> +	evstddx evr6, r4, r3		/* save acc */
> +	blr
> +
> +_GLOBAL(kvmppc_load_guest_spe)
> +	cmpi	0,r3,0
> +	beqlr-
> +	li      r4,VCPU_ACC
> +	evlddx  evr6,r4,r3
> +	evmra   evr6,evr6		/* load acc */
> +	addi	r5,r3,VCPU_EVR
> +	KVMPPC_LOAD_32EVR(0,r4,r5)	/* load evr[32] */
> +	blr
> +#endif
> diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
> index 0c1af12..fbd3bdc 100644
> --- a/arch/powerpc/kvm/e500.c
> +++ b/arch/powerpc/kvm/e500.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
> + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
>  *
>  * Author: Yu Liu, <yu.liu@xxxxxxxxxxxxx>
>  *
> @@ -25,6 +25,25 @@
> #include "booke.h"
> #include "e500_tlb.h"
> 
> +#ifdef CONFIG_SPE
> +extern void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu);
> +extern void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu);
> +
> +void kvmppc_vcpu_spe_put(struct kvm_vcpu *vcpu)
> +{
> +	enable_kernel_spe();
> +	kvmppc_save_guest_spe(vcpu);
> +	vcpu->msr_block |= MSR_SPE;
> +}
> +
> +void kvmppc_vcpu_spe_load(struct kvm_vcpu *vcpu)
> +{
> +	enable_kernel_spe();
> +	kvmppc_load_guest_spe(vcpu);
> +	vcpu->msr_block &= ~MSR_SPE;
> +}
> +#endif
> +
> void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
> {
> }
> @@ -41,6 +60,11 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
> {
> 	kvmppc_e500_tlb_put(vcpu);
> +#ifdef CONFIG_SPE
> +	/* save SPE env if guest has used SPE since last save */
> +	if (!(vcpu->msr_block & MSR_SPE))
> +		kvmppc_vcpu_spe_put(vcpu);
> +#endif
> }
> 
> int kvmppc_core_check_processor_compat(void)
> -- 
> 1.7.1
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM Development]     [KVM ARM]     [KVM ia64]     [Linux Virtualization]     [Linux USB Devel]     [Linux Video]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux