This patch applies to the v9 patch series of KVM, i.e. commit d5321dceeaccf756755e76b38d8b5905bd99d250 In this patch, we use the Hyp Coprocessor Trap Register (HPCTR) to trap VFP/NEON instructions, and switch the FPU state at that point. After a guest exit, the VFP state is returned to the host. When disabling access to floating point instructions, we also mask FPEXC_EN in order to avoid the guest receiving Undefined instruction exceptions before we have a chance to switch back the floating point state. Initial lazy switching implementation provided by Rusty Russell. In this version of the patch, FPEXC_EN is no longer clobbered for the host; we save FPEXC before we mask it. We are reusing vfp_hard_struct, so now we depend on VFPv3 being enabled in the host kernel. For now, if the kernel has not been built with VFPv3, we don't build the VFP switching code in KVM, however we still trap cp10 and cp11 in order to inject an undefined instruction exception whenever the guest tries to use VFP/NEON. Alternatives to this would be to either require VFPv3 for KVM in the first place, or to implement this as a feature that can be switched on and off. If we don't mind the #ifdefs we can keep the current solution. For now I don't reuse vfp_save_state. If we still want to do that, we would have to change it a bit and also add a vfp_restore_state, as already mentioned by reviewers. However I have noticed that vfp_save_state doesn't clear FPEXC_EX before reading the rest of the registers, as we do according to the ARM ARM p. AppxF-2388. Can someone comment on whether this is a bug in vfp_save_state? Changes since v2: * Fix clobbering the host's FPEXC_EN on entry * Reuse vfp_hard_struct from asm/fpstate.h * Reuse macros from asm/vfpmacros.h (warning: out of date mnemonics) * Disable VFP support if kernel hasn't been cofigured with VFPv3 * Tweak and reuse set_hcptr instead of setting the HCPTR manually Changes since v1: * Lazy switching by Rusty Russell * Lazy switching fix; mask FPEXC_EN so guest processes don't crash * Simpler lazy switching handler * Replaced instruction mnemonics with coprocessor operations so the code can be built on gcc configurations with no full VFP-d32 support * VFP subarchitecture handling fix; disable FPEXC_EX before switching cp11 registers Signed-off-by: Rusty Russell <rusty.russell at linaro.org> Signed-off-by: Antonios Motakis <a.motakis at virtualopensystems.com> --- arch/arm/include/asm/kvm_host.h | 6 +++ arch/arm/kernel/asm-offsets.c | 2 + arch/arm/kvm/interrupts.S | 104 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 104 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c7e782..63d5cc1 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -19,6 +19,8 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include <asm/fpstate.h> + #define KVM_MAX_VCPUS 4 #define KVM_MEMORY_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 @@ -103,6 +105,10 @@ struct kvm_vcpu_arch { /* System control coprocessor (cp15) */ u32 cp15[nr_cp15_regs]; + /* Floating point registers (VFP and Advanced SIMD/NEON) */ + struct vfp_hard_struct vfp_guest; + struct vfp_hard_struct vfp_host; + /* Exception Information */ u32 hsr; /* Hyp Syndrom Register */ u32 hdfar; /* Hyp Data Fault Address Register */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 9c76b53..d9ed793 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -168,6 +168,8 @@ int main(void) DEFINE(VCPU_TID_URW, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URW])); DEFINE(VCPU_TID_URO, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URO])); DEFINE(VCPU_TID_PRIV, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_PRIV])); + DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); + DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.vfp_host)); DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index fd7331c..3159ce7 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -23,6 +23,7 @@ #include <asm/asm-offsets.h> #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> +#include <asm/vfpmacros.h> #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) @@ -89,6 +90,49 @@ ENTRY(__kvm_flush_vm_context) bx lr ENDPROC(__kvm_flush_vm_context) +/* Clobbers {r2-r7} */ +.macro store_vfp_state vfp_off, vcpup + add r6, \vcpup, \vfp_off + + @ The VFPFMRX and VFPFMXR macros are the VMRS and VMSR instructions + vfpfmrx r2, FPEXC + @ Make sure VFP is enabled so we can touch the registers. + orr r7, r2, #FPEXC_EN + vfpfmxr FPEXC, r7 + + vfpfmrx r3, FPSCR + tst r2, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + vfpfmrx r4, FPINST + tst r2, #FPEXC_FP2V + beq 2f + vfpfmrx r5, FPINST2 +2: + bic r7, r2, #FPEXC_EX @ FPEXC_EX disable + vfpfmxr FPEXC, r7 +1: + vfpfstmia r6, r7 @ Save VFP registers + stm r6, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2 +.endm + +/* Assume FPEXC_EN is on and FPEXC_EX is off, clobbers {r2-r7} */ +.macro restore_vfp_state vfp_off, vcpup + add r6, \vcpup, \vfp_off + + vfpfldmia r6, r7 @ Load VFP registers + ldm r6, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2 + + vfpfmxr FPSCR, r3 + tst r2, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + vfpfmxr FPINST, r4 + tst r2, #FPEXC_FP2V + beq 1f + vfpfmxr FPINST2, r5 +1: + vfpfmxr FPEXC, r2 @ FPEXC (last, in case !EN) +.endm + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @ Hypervisor world-switch code @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -281,16 +325,16 @@ ENDPROC(__kvm_flush_vm_context) .endm /* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return - * (hardware reset value is 0) */ -.macro set_hcptr entry + * (hardware reset value is 0). Keep previous value in r2. */ +.macro set_hcptr entry, mask mrc p15, 4, r2, c1, c1, 2 - ldr r3, =(HCPTR_TTA) + ldr r3, =\mask .if \entry == 1 - orr r2, r2, r3 @ Trap some coproc-accesses + orr r3, r2, r3 @ Trap some coproc-accesses .else - bic r2, r2, r3 @ Don't trap any coproc- accesses + bic r3, r2, r3 @ Don't trap any coproc- accesses .endif - mcr p15, 4, r2, c1, c1, 2 + mcr p15, 4, r3, c1, c1, 2 .endm /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ @@ -328,6 +372,14 @@ ENTRY(__kvm_vcpu_run) read_cp15_state write_cp15_state 1, r0 +#ifdef CONFIG_VFPv3 + @ Mask FPEXC_EN so the guest doesn't trap floating point instructions + vfpfmrx r2, FPEXC @ VMRS + push {r2} + orr r2, r2, #FPEXC_EN + vfpfmxr FPEXC, r2 @ VMSR +#endif + push {r0} @ Push the VCPU pointer @ Configure Hyp-role @@ -335,7 +387,7 @@ ENTRY(__kvm_vcpu_run) @ Trap coprocessor CRx accesses set_hstr 1 - set_hcptr 1 + set_hcptr 1, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) @ Write configured ID register into MIDR alias ldr r1, [r0, #VCPU_MIDR] @@ -395,7 +447,22 @@ __kvm_vcpu_return: @ Don't trap coprocessor accesses for host kernel set_hstr 0 - set_hcptr 0 + set_hcptr 0, (HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11)) + +#ifdef CONFIG_VFPv3 + @ Save floating point registers we if let guest use them. + tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11)) + bne after_vfp_restore + + @ Switch VFP/NEON hardware state to the host's + store_vfp_state #VCPU_VFP_GUEST, r1 + restore_vfp_state #VCPU_VFP_HOST, r1 + +after_vfp_restore: + @ Restore FPEXC_EN which we clobbered on entry + pop {r2} + vfpfmxr FPEXC, r2 +#endif @ Reset Hyp-role configure_hyp_role 0, r1 @@ -587,6 +654,10 @@ hyp_hvc: @ Check syndrome register mrc p15, 4, r0, c5, c2, 0 @ HSR lsr r1, r0, #HSR_EC_SHIFT +#ifdef CONFIG_VFPv3 + cmp r1, #HSR_EC_CP_0_13 + beq switch_to_guest_vfp +#endif cmp r1, #HSR_EC_HVC bne guest_trap @ Not HVC instr. @@ -661,6 +732,23 @@ guest_trap: 1: mov r0, #ARM_EXCEPTION_HVC b __kvm_vcpu_return +#ifdef CONFIG_VFPv3 +switch_to_guest_vfp: + ldr r0, [sp, #12] @ Load VCPU pointer + push {r3-r7} + + @ NEON/VFP used. Turn on VFP access. + set_hcptr 0, (HCPTR_TCP(10) | HCPTR_TCP(11)) + + @ Switch VFP/NEON hardware state to the guest's + store_vfp_state #VCPU_VFP_HOST, r0 + restore_vfp_state #VCPU_VFP_GUEST, r0 + + pop {r3-r7} + pop {r0-r2} + eret +#endif + .align hyp_irq: push {r0} -- 1.7.9.5