[kvmarm] [PATCH v3] ARM: KVM: lazy save/restore of vfp/neon

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch applies to the v9 patch series of KVM,
i.e. commit d5321dceeaccf756755e76b38d8b5905bd99d250

In this patch, we use the Hyp Coprocessor Trap Register
(HPCTR) to trap VFP/NEON instructions, and switch the FPU
state at that point. After a guest exit, the VFP state is
returned to the host. When disabling access to floating point
instructions, we also mask FPEXC_EN in order to avoid the guest
receiving Undefined instruction exceptions before we have a
chance to switch back the floating point state.

Initial lazy switching implementation provided by Rusty Russell.

In this version of the patch, FPEXC_EN is no longer clobbered for
the host; we save FPEXC before we mask it.

We are reusing vfp_hard_struct, so now we depend on VFPv3 being
enabled in the host kernel. For now, if the kernel has not been
built with VFPv3, we don't build the VFP switching code in KVM,
however we still trap cp10 and cp11 in order to inject an undefined
instruction exception whenever the guest tries to use VFP/NEON.

Alternatives to this would be to either require VFPv3 for KVM in
the first place, or to implement this as a feature that can be switched
on and off. If we don't mind the #ifdefs we can keep the current solution.

For now I don't reuse vfp_save_state. If we still want to do that, we
would have to change it a bit and also add a vfp_restore_state, as
already mentioned by reviewers. However I have noticed that vfp_save_state
doesn't clear FPEXC_EX before reading the rest of the registers, as
we do according to the ARM ARM p. AppxF-2388. Can someone comment on
whether this is a bug in vfp_save_state?

Changes since v2:
 * Fix clobbering the host's FPEXC_EN on entry
 * Reuse vfp_hard_struct from asm/fpstate.h
 * Reuse macros from asm/vfpmacros.h (warning: out of date mnemonics)
 * Disable VFP support if kernel hasn't been cofigured with VFPv3
 * Tweak and reuse set_hcptr instead of setting the HCPTR manually

Changes since v1:
 * Lazy switching by Rusty Russell
 * Lazy switching fix; mask FPEXC_EN so guest processes don't crash
 * Simpler lazy switching handler
 * Replaced instruction mnemonics with coprocessor operations so the
   code can be built on gcc configurations with no full VFP-d32 support
 * VFP subarchitecture handling fix; disable FPEXC_EX before switching
   cp11 registers

Signed-off-by: Rusty Russell <rusty.russell at linaro.org>
Signed-off-by: Antonios Motakis <a.motakis at virtualopensystems.com>
---
 arch/arm/include/asm/kvm_host.h |    6 +++
 arch/arm/kernel/asm-offsets.c   |    2 +
 arch/arm/kvm/interrupts.S       |  104 ++++++++++++++++++++++++++++++++++++---
 3 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0c7e782..63d5cc1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <asm/fpstate.h>
+
 #define KVM_MAX_VCPUS 4
 #define KVM_MEMORY_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
@@ -103,6 +105,10 @@ struct kvm_vcpu_arch {
 	/* System control coprocessor (cp15) */
 	u32 cp15[nr_cp15_regs];
 
+	/* Floating point registers (VFP and Advanced SIMD/NEON) */
+	struct vfp_hard_struct vfp_guest;
+	struct vfp_hard_struct vfp_host;
+
 	/* Exception Information */
 	u32 hsr;		/* Hyp Syndrom Register */
 	u32 hdfar;		/* Hyp Data Fault Address Register */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 9c76b53..d9ed793 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -168,6 +168,8 @@ int main(void)
   DEFINE(VCPU_TID_URW,		offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URW]));
   DEFINE(VCPU_TID_URO,		offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URO]));
   DEFINE(VCPU_TID_PRIV,		offsetof(struct kvm_vcpu, arch.cp15[c13_TID_PRIV]));
+  DEFINE(VCPU_VFP_GUEST,	offsetof(struct kvm_vcpu, arch.vfp_guest));
+  DEFINE(VCPU_VFP_HOST,		offsetof(struct kvm_vcpu, arch.vfp_host));
   DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
   DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
   DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index fd7331c..3159ce7 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -23,6 +23,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
+#include <asm/vfpmacros.h>
 
 #define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
 #define VCPU_USR_SP		(VCPU_USR_REG(13))
@@ -89,6 +90,49 @@ ENTRY(__kvm_flush_vm_context)
 	bx	lr
 ENDPROC(__kvm_flush_vm_context)
 
+/* Clobbers {r2-r7} */
+.macro store_vfp_state vfp_off, vcpup
+	add	r6, \vcpup, \vfp_off
+
+	@ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions
+	vfpfmrx	r2, FPEXC
+	@ Make sure VFP is enabled so we can touch the registers.
+	orr	r7, r2, #FPEXC_EN
+	vfpfmxr	FPEXC, r7
+
+	vfpfmrx	r3, FPSCR
+	tst	r2, #FPEXC_EX		@ Check for VFP Subarchitecture
+	beq	1f
+	vfpfmrx r4, FPINST
+	tst	r2, #FPEXC_FP2V
+	beq	2f
+	vfpfmrx r5, FPINST2
+2:
+	bic	r7, r2, #FPEXC_EX	@ FPEXC_EX disable
+	vfpfmxr	FPEXC, r7
+1:
+	vfpfstmia r6, r7		@ Save VFP registers
+	stm	r6, {r2-r5}		@ Save FPEXC, FPSCR, FPINST, FPINST2
+.endm
+
+/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r7} */
+.macro restore_vfp_state vfp_off, vcpup
+	add	r6, \vcpup, \vfp_off
+
+	vfpfldmia r6, r7		@ Load VFP registers
+	ldm	r6, {r2-r5}		@ Load FPEXC, FPSCR, FPINST, FPINST2
+
+	vfpfmxr FPSCR, r3
+	tst	r2, #FPEXC_EX		@ Check for VFP Subarchitecture
+	beq	1f
+	vfpfmxr FPINST, r4
+	tst	r2, #FPEXC_FP2V
+	beq	1f
+	vfpfmxr FPINST2, r5
+1:
+	vfpfmxr FPEXC, r2	@ FPEXC	(last, in case !EN)
+.endm
+
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 @  Hypervisor world-switch code
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ -281,16 +325,16 @@ ENDPROC(__kvm_flush_vm_context)
 .endm
 
 /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
- * (hardware reset value is 0) */
-.macro set_hcptr entry
+ * (hardware reset value is 0). Keep previous value in r2. */
+.macro set_hcptr entry, mask
 	mrc	p15, 4, r2, c1, c1, 2
-	ldr	r3, =(HCPTR_TTA)
+	ldr	r3, =\mask
 	.if \entry == 1
-	orr	r2, r2, r3		@ Trap some coproc-accesses
+	orr	r3, r2, r3		@ Trap some coproc-accesses
 	.else
-	bic	r2, r2, r3		@ Don't trap any coproc- accesses
+	bic	r3, r2, r3		@ Don't trap any coproc- accesses
 	.endif
-	mcr	p15, 4, r2, c1, c1, 2
+	mcr	p15, 4, r3, c1, c1, 2
 .endm
 
 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
@@ -328,6 +372,14 @@ ENTRY(__kvm_vcpu_run)
 	read_cp15_state
 	write_cp15_state 1, r0
 
+#ifdef CONFIG_VFPv3
+	@ Mask FPEXC_EN so the guest doesn't trap floating point instructions
+	vfpfmrx r2, FPEXC		@ VMRS
+	push	{r2}
+	orr	r2, r2, #FPEXC_EN
+	vfpfmxr FPEXC, r2		@ VMSR
+#endif
+
 	push	{r0}			@ Push the VCPU pointer
 
 	@ Configure Hyp-role
@@ -335,7 +387,7 @@ ENTRY(__kvm_vcpu_run)
 
 	@ Trap coprocessor CRx accesses
 	set_hstr 1
-	set_hcptr 1
+	set_hcptr 1, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
 
 	@ Write configured ID register into MIDR alias
 	ldr	r1, [r0, #VCPU_MIDR]
@@ -395,7 +447,22 @@ __kvm_vcpu_return:
 
 	@ Don't trap coprocessor accesses for host kernel
 	set_hstr 0
-	set_hcptr 0
+	set_hcptr 0, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11))
+
+#ifdef CONFIG_VFPv3
+	@ Save floating point registers we if let guest use them.
+	tst	r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
+	bne	after_vfp_restore
+
+	@ Switch VFP/NEON hardware state to the host's
+	store_vfp_state #VCPU_VFP_GUEST, r1
+	restore_vfp_state #VCPU_VFP_HOST, r1
+
+after_vfp_restore:
+	@ Restore FPEXC_EN which we clobbered on entry
+	pop	{r2}
+	vfpfmxr FPEXC, r2
+#endif
 
 	@ Reset Hyp-role
 	configure_hyp_role 0, r1
@@ -587,6 +654,10 @@ hyp_hvc:
 	@ Check syndrome register
 	mrc	p15, 4, r0, c5, c2, 0	@ HSR
 	lsr	r1, r0, #HSR_EC_SHIFT
+#ifdef CONFIG_VFPv3
+	cmp	r1, #HSR_EC_CP_0_13
+	beq	switch_to_guest_vfp
+#endif
 	cmp	r1, #HSR_EC_HVC
 	bne	guest_trap		@ Not HVC instr.
 
@@ -661,6 +732,23 @@ guest_trap:
 1:	mov	r0, #ARM_EXCEPTION_HVC
 	b	__kvm_vcpu_return
 
+#ifdef CONFIG_VFPv3
+switch_to_guest_vfp:
+	ldr	r0, [sp, #12]		@ Load VCPU pointer
+	push	{r3-r7}
+
+	@ NEON/VFP used.  Turn on VFP access.
+	set_hcptr 0, (HCPTR_TCP(10) | HCPTR_TCP(11))
+
+	@ Switch VFP/NEON hardware state to the guest's
+	store_vfp_state #VCPU_VFP_HOST, r0
+	restore_vfp_state #VCPU_VFP_GUEST, r0
+
+	pop	{r3-r7}
+	pop	{r0-r2}
+	eret
+#endif
+
 	.align
 hyp_irq:
 	push	{r0}
-- 
1.7.9.5



[Index of Archives]     [Linux KVM]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux