[kvmarm] [PATCH v2] ARM: KVM: lazy save/restore of vfp/neon

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch applies to the v9 patch series of KVM,
i.e. commit d5321dceeaccf756755e76b38d8b5905bd99d250

In this patch, we use the Hyp Coprocessor Trap Register
(HPCTR) to trap VFP/NEON instructions, and switch the FPU
state at that point. After a guest exit, the VFP state is
returned to the host.

Initial lazy switching implementation provided by Rusty Russell.

When disabling access to floating point instructions, we also mask
FPEXC_EN in order to avoid the guest receiving Undefined instruction
exceptions before we have a chance to switch back the floating point
state. One assumption in the patch is that FPEXC_EN will be off for
the host when we are about to enter the guest. Removing this assumption
is trivial, but it looks like it is always the case (comments?).

Changes since v1:
 * Lazy switching by Rusty Russell
 * Lazy switching fix; mask FPEXC_EN so guest processes don't crash
 * Simpler lazy switching handler
 * Replaced instruction mnemonics with coprocessor operations so the
   code can be built on gcc configurations with no full VFP-d32 support
 * VFP subarchitecture handling fix; disable FPEXC_EX before switching
   cp11 registers

Signed-off-by: Antonios Motakis <a.motakis at virtualopensystems.com>
Signed-off-by: Rusty Russell <rusty.russell at linaro.org>
---
 arch/arm/include/asm/kvm_host.h |   16 ++++++
 arch/arm/kernel/asm-offsets.c   |    4 ++
 arch/arm/kvm/interrupts.S       |  111 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0c7e782..2906ae9 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -97,12 +97,28 @@ enum cp15_regs {
 	nr_cp15_regs
 };
 
+enum cp10_regs {
+	FPEXC,			/* Floating Point Exception Control Register */
+	FPSCR,			/* Floating Point Status and Control Register */
+	FPINST,			/* Common VFP Subarchitecture Registers */
+	FPINST2,
+	nr_cp10_regs
+};
+
 struct kvm_vcpu_arch {
 	struct kvm_vcpu_regs regs;
 
 	/* System control coprocessor (cp15) */
 	u32 cp15[nr_cp15_regs];
 
+	/* Floating point registers (VFP and Advanced SIMD/NEON) */
+	u32 guest_cp10[nr_cp10_regs];
+	u32 guest_cp11[64];
+
+	/* Saved host vfp state. */
+	u32 host_cp10[nr_cp10_regs];
+	u32 host_cp11[64];
+
 	/* Exception Information */
 	u32 hsr;		/* Hyp Syndrom Register */
 	u32 hdfar;		/* Hyp Data Fault Address Register */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 9c76b53..18747c8 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -168,6 +168,10 @@ int main(void)
   DEFINE(VCPU_TID_URW,		offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URW]));
   DEFINE(VCPU_TID_URO,		offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URO]));
   DEFINE(VCPU_TID_PRIV,		offsetof(struct kvm_vcpu, arch.cp15[c13_TID_PRIV]));
+  DEFINE(VCPU_GUEST_CP10,	offsetof(struct kvm_vcpu, arch.guest_cp10));
+  DEFINE(VCPU_GUEST_CP11,	offsetof(struct kvm_vcpu, arch.guest_cp11));
+  DEFINE(VCPU_HOST_CP10,	offsetof(struct kvm_vcpu, arch.host_cp10));
+  DEFINE(VCPU_HOST_CP11,	offsetof(struct kvm_vcpu, arch.host_cp11));
   DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
   DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
   DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index fd7331c..83d61d9 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -23,6 +23,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
+#include <asm/vfp.h>
 
 #define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
 #define VCPU_USR_SP		(VCPU_USR_REG(13))
@@ -89,6 +90,62 @@ ENTRY(__kvm_flush_vm_context)
 	bx	lr
 ENDPROC(__kvm_flush_vm_context)
 
+/* Clobbers {r2-r5} and {r9-r11} */
+.macro store_vfp_state cp10_off, cp11_off, vcpup
+	mrc	p10, 7, r2, cr8, cr0, 0		@ FPEXC
+	@ Make sure VFP is enabled so we can touch the registers.
+	orr	r3, r2, #FPEXC_EN
+	mcr	p10, 7, r3, cr8, cr0, 0		@ FPEXC
+
+	mrc	p10, 7, r3, cr1, cr0, 0		@ FPSCR
+	tst	r2, #FPEXC_EX			@ Check for VFP Subarchitecture
+	beq	1f
+	mrc	p10, 7, r4, cr9, cr0, 0		@ FPINST
+	tst	r2, #FPEXC_FP2V
+	beq	2f
+	mrc	p10, 7, r5, cr10, cr0, 0	@ FPINST2
+2:
+	bic	r9, r9, #FPEXC_EX
+	mcr	p10, 7, r9, cr8, cr0, 0		@ FPEXC_EX disable
+
+1:
+	add	r10, \vcpup, \cp10_off
+	stm	r10, {r2-r5}		@ Save FPEXC, FPSCR, FPINST, FPINST2
+
+
+	add	r11, \vcpup, \cp11_off
+	stc	p11, cr0, [r11],#32*4		@ VSTM r11!, {d0-d15}
+	mrc	p10, 7, r9, cr7, cr0, 0		@ MVFR0
+	and	r9, r9, #MVFR0_A_SIMD_MASK
+	cmp	r9, #2				@ Check for 32 registers
+	stceql	p11, cr0, [r11],#32*4		@ VSTMeq r11!, {d16-d31}
+
+.endm
+
+/* Assume FPEXC_EN is on and FPEXC_EX is off */
+/* Clobbers {r2-r5} and {r9-r11} */
+.macro restore_vfp_state cp10_off, cp11_off, vcpup
+	add	r11, \vcpup, \cp11_off
+	ldc	p11, cr0, [r11],#32*4		@ VLDM r11!, {d0-d15}
+	mrc	p10, 7, r9, cr7, cr0, 0		@ MVFR0
+	and	r9, r9, #MVFR0_A_SIMD_MASK
+	cmp	r9, #2				@ Check for 32 registers
+	ldceql	p11, cr0, [r11],#32*4		@ VLDMeq r11!, {d16-d31}
+
+	add	r10, \vcpup, \cp10_off
+	ldm	r10, {r2-r5}		@ Load FPEXC, FPSCR, FPINST, FPINST2
+
+	mcr	p10, 7, r3, cr1, cr0, 0		@ FPSCR
+	tst	r2, #FPEXC_EX			@ Check for VFP Subarchitecture
+	beq	1f
+	mcr	p10, 7, r4, cr9, cr0, 0		@ FPINST
+	tst	r2, #FPEXC_FP2V
+	beq	1f
+	mcr	p10, 7, r5, cr10, cr0, 0	@ FPINST2
+1:
+	mcr	p10, 7, r2, cr8, cr0, 0		@ FPEXC	(last, in case !EN)
+.endm
+
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 @  Hypervisor world-switch code
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ -337,6 +394,16 @@ ENTRY(__kvm_vcpu_run)
 	set_hstr 1
 	set_hcptr 1
 
+	@ Mask FPEXC_EN so the guest doesn't trap floating point instructions
+	mrc	p10, 7, r2, cr8, cr0, 0		@ FPEXC
+	orr	r2, r2, #FPEXC_EN
+	mcr	p10, 7, r2, cr8, cr0, 0		@ FPEXC
+
+	@ Trap floating point accesses so we can lazy restore
+	mrc	p15, 4, r1, c1, c1, 2
+	orr	r1, r1, #((1 << 10) | (1 << 11))	@ Trap cp10 and cp11
+	mcr	p15, 4, r1, c1, c1, 2
+
 	@ Write configured ID register into MIDR alias
 	ldr	r1, [r0, #VCPU_MIDR]
 	mcr	p15, 4, r1, c0, c0, 0
@@ -393,6 +460,27 @@ __kvm_vcpu_return:
 	store_mode_state r1, usr
 	sub	r1, r1, #(VCPU_USR_REG(13))
 
+	@ Save floating point registers we if let guest use them.
+	mrc	p15, 4, r2, c1, c1, 2
+	tst	r2, #((1 << 10) | (1 << 11))
+	beq	switch_to_host_vfp
+
+	@ Don't trap VFP accesses for host kernel.
+	bic	r2, r2, #((1 << 10) | (1 << 11))
+	mcr	p15, 4, r2, c1, c1, 2
+	b	after_vfp_restore
+
+switch_to_host_vfp:
+	@ Switch VFP/NEON hardware state to the host's
+	store_vfp_state #VCPU_GUEST_CP10, #VCPU_GUEST_CP11, r1
+	restore_vfp_state #VCPU_HOST_CP10, #VCPU_HOST_CP11, r1
+
+after_vfp_restore:
+	@ Disable FPEXC_EN again
+	mrc	p10, 7, r2, cr8, cr0, 0		@ FPEXC
+	bic	r2, r2, #FPEXC_EN
+	mcr	p10, 7, r2, cr8, cr0, 0
+
 	@ Don't trap coprocessor accesses for host kernel
 	set_hstr 0
 	set_hcptr 0
@@ -587,6 +675,8 @@ hyp_hvc:
 	@ Check syndrome register
 	mrc	p15, 4, r0, c5, c2, 0	@ HSR
 	lsr	r1, r0, #HSR_EC_SHIFT
+	cmp	r1, #HSR_EC_CP_0_13
+	beq	switch_to_guest_vfp
 	cmp	r1, #HSR_EC_HVC
 	bne	guest_trap		@ Not HVC instr.
 
@@ -661,6 +751,27 @@ guest_trap:
 1:	mov	r0, #ARM_EXCEPTION_HVC
 	b	__kvm_vcpu_return
 
+switch_to_guest_vfp:
+	ldr	r0, [sp, #12]		@ Load VCPU pointer
+
+	@ NEON/VFP used.  Turn on VFP access.
+	mrc	p15, 4, r2, c1, c1, 2
+	bic	r2, r2, #((1 << 10) | (1 << 11))
+	mcr	p15, 4, r2, c1, c1, 2
+
+	push	{r3-r5}
+	push	{r9-r11}
+
+	@ Switch VFP/NEON hardware state to the guest's
+	store_vfp_state #VCPU_HOST_CP10, #VCPU_HOST_CP11, r0
+	restore_vfp_state #VCPU_GUEST_CP10, #VCPU_GUEST_CP11, r0
+
+	pop	{r9-r11}
+	pop	{r3-r5}
+
+	pop	{r0, r1, r2}
+	eret
+
 	.align
 hyp_irq:
 	push	{r0}
-- 
1.7.9.5



[Index of Archives]     [Linux KVM]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux