This patch applies to the v9 patch series of KVM, i.e. commit d5321dceeaccf756755e76b38d8b5905bd99d250 In this patch, we use the Hyp Coprocessor Trap Register (HPCTR) to trap VFP/NEON instructions, and switch the FPU state at that point. After a guest exit, the VFP state is returned to the host. Initial lazy switching implementation provided by Rusty Russell. When disabling access to floating point instructions, we also mask FPEXC_EN in order to avoid the guest receiving Undefined instruction exceptions before we have a chance to switch back the floating point state. One assumption in the patch is that FPEXC_EN will be off for the host when we are about to enter the guest. Removing this assumption is trivial, but it looks like it is always the case (comments?). Changes since v1: * Lazy switching by Rusty Russell * Lazy switching fix; mask FPEXC_EN so guest processes don't crash * Simpler lazy switching handler * Replaced instruction mnemonics with coprocessor operations so the code can be built on gcc configurations with no full VFP-d32 support * VFP subarchitecture handling fix; disable FPEXC_EX before switching cp11 registers Signed-off-by: Antonios Motakis <a.motakis at virtualopensystems.com> Signed-off-by: Rusty Russell <rusty.russell at linaro.org> --- arch/arm/include/asm/kvm_host.h | 16 ++++++ arch/arm/kernel/asm-offsets.c | 4 ++ arch/arm/kvm/interrupts.S | 111 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 131 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 0c7e782..2906ae9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -97,12 +97,28 @@ enum cp15_regs { nr_cp15_regs }; +enum cp10_regs { + FPEXC, /* Floating Point Exception Control Register */ + FPSCR, /* Floating Point Status and Control Register */ + FPINST, /* Common VFP Subarchitecture Registers */ + FPINST2, + nr_cp10_regs +}; + struct kvm_vcpu_arch { struct kvm_vcpu_regs regs; /* System control coprocessor (cp15) */ u32 cp15[nr_cp15_regs]; + /* Floating point registers (VFP and Advanced SIMD/NEON) */ + u32 guest_cp10[nr_cp10_regs]; + u32 guest_cp11[64]; + + /* Saved host vfp state. */ + u32 host_cp10[nr_cp10_regs]; + u32 host_cp11[64]; + /* Exception Information */ u32 hsr; /* Hyp Syndrom Register */ u32 hdfar; /* Hyp Data Fault Address Register */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 9c76b53..18747c8 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -168,6 +168,10 @@ int main(void) DEFINE(VCPU_TID_URW, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URW])); DEFINE(VCPU_TID_URO, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URO])); DEFINE(VCPU_TID_PRIV, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_PRIV])); + DEFINE(VCPU_GUEST_CP10, offsetof(struct kvm_vcpu, arch.guest_cp10)); + DEFINE(VCPU_GUEST_CP11, offsetof(struct kvm_vcpu, arch.guest_cp11)); + DEFINE(VCPU_HOST_CP10, offsetof(struct kvm_vcpu, arch.host_cp10)); + DEFINE(VCPU_HOST_CP11, offsetof(struct kvm_vcpu, arch.host_cp11)); DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index fd7331c..83d61d9 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -23,6 +23,7 @@ #include <asm/asm-offsets.h> #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> +#include <asm/vfp.h> #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) @@ -89,6 +90,62 @@ ENTRY(__kvm_flush_vm_context) bx lr ENDPROC(__kvm_flush_vm_context) +/* Clobbers {r2-r5} and {r9-r11} */ +.macro store_vfp_state cp10_off, cp11_off, vcpup + mrc p10, 7, r2, cr8, cr0, 0 @ FPEXC + @ Make sure VFP is enabled so we can touch the registers. + orr r3, r2, #FPEXC_EN + mcr p10, 7, r3, cr8, cr0, 0 @ FPEXC + + mrc p10, 7, r3, cr1, cr0, 0 @ FPSCR + tst r2, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + mrc p10, 7, r4, cr9, cr0, 0 @ FPINST + tst r2, #FPEXC_FP2V + beq 2f + mrc p10, 7, r5, cr10, cr0, 0 @ FPINST2 +2: + bic r9, r9, #FPEXC_EX + mcr p10, 7, r9, cr8, cr0, 0 @ FPEXC_EX disable + +1: + add r10, \vcpup, \cp10_off + stm r10, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2 + + + add r11, \vcpup, \cp11_off + stc p11, cr0, [r11],#32*4 @ VSTM r11!, {d0-d15} + mrc p10, 7, r9, cr7, cr0, 0 @ MVFR0 + and r9, r9, #MVFR0_A_SIMD_MASK + cmp r9, #2 @ Check for 32 registers + stceql p11, cr0, [r11],#32*4 @ VSTMeq r11!, {d16-d31} + +.endm + +/* Assume FPEXC_EN is on and FPEXC_EX is off */ +/* Clobbers {r2-r5} and {r9-r11} */ +.macro restore_vfp_state cp10_off, cp11_off, vcpup + add r11, \vcpup, \cp11_off + ldc p11, cr0, [r11],#32*4 @ VLDM r11!, {d0-d15} + mrc p10, 7, r9, cr7, cr0, 0 @ MVFR0 + and r9, r9, #MVFR0_A_SIMD_MASK + cmp r9, #2 @ Check for 32 registers + ldceql p11, cr0, [r11],#32*4 @ VLDMeq r11!, {d16-d31} + + add r10, \vcpup, \cp10_off + ldm r10, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2 + + mcr p10, 7, r3, cr1, cr0, 0 @ FPSCR + tst r2, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + mcr p10, 7, r4, cr9, cr0, 0 @ FPINST + tst r2, #FPEXC_FP2V + beq 1f + mcr p10, 7, r5, cr10, cr0, 0 @ FPINST2 +1: + mcr p10, 7, r2, cr8, cr0, 0 @ FPEXC (last, in case !EN) +.endm + @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @ Hypervisor world-switch code @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -337,6 +394,16 @@ ENTRY(__kvm_vcpu_run) set_hstr 1 set_hcptr 1 + @ Mask FPEXC_EN so the guest doesn't trap floating point instructions + mrc p10, 7, r2, cr8, cr0, 0 @ FPEXC + orr r2, r2, #FPEXC_EN + mcr p10, 7, r2, cr8, cr0, 0 @ FPEXC + + @ Trap floating point accesses so we can lazy restore + mrc p15, 4, r1, c1, c1, 2 + orr r1, r1, #((1 << 10) | (1 << 11)) @ Trap cp10 and cp11 + mcr p15, 4, r1, c1, c1, 2 + @ Write configured ID register into MIDR alias ldr r1, [r0, #VCPU_MIDR] mcr p15, 4, r1, c0, c0, 0 @@ -393,6 +460,27 @@ __kvm_vcpu_return: store_mode_state r1, usr sub r1, r1, #(VCPU_USR_REG(13)) + @ Save floating point registers we if let guest use them. + mrc p15, 4, r2, c1, c1, 2 + tst r2, #((1 << 10) | (1 << 11)) + beq switch_to_host_vfp + + @ Don't trap VFP accesses for host kernel. + bic r2, r2, #((1 << 10) | (1 << 11)) + mcr p15, 4, r2, c1, c1, 2 + b after_vfp_restore + +switch_to_host_vfp: + @ Switch VFP/NEON hardware state to the host's + store_vfp_state #VCPU_GUEST_CP10, #VCPU_GUEST_CP11, r1 + restore_vfp_state #VCPU_HOST_CP10, #VCPU_HOST_CP11, r1 + +after_vfp_restore: + @ Disable FPEXC_EN again + mrc p10, 7, r2, cr8, cr0, 0 @ FPEXC + bic r2, r2, #FPEXC_EN + mcr p10, 7, r2, cr8, cr0, 0 + @ Don't trap coprocessor accesses for host kernel set_hstr 0 set_hcptr 0 @@ -587,6 +675,8 @@ hyp_hvc: @ Check syndrome register mrc p15, 4, r0, c5, c2, 0 @ HSR lsr r1, r0, #HSR_EC_SHIFT + cmp r1, #HSR_EC_CP_0_13 + beq switch_to_guest_vfp cmp r1, #HSR_EC_HVC bne guest_trap @ Not HVC instr. @@ -661,6 +751,27 @@ guest_trap: 1: mov r0, #ARM_EXCEPTION_HVC b __kvm_vcpu_return +switch_to_guest_vfp: + ldr r0, [sp, #12] @ Load VCPU pointer + + @ NEON/VFP used. Turn on VFP access. + mrc p15, 4, r2, c1, c1, 2 + bic r2, r2, #((1 << 10) | (1 << 11)) + mcr p15, 4, r2, c1, c1, 2 + + push {r3-r5} + push {r9-r11} + + @ Switch VFP/NEON hardware state to the guest's + store_vfp_state #VCPU_HOST_CP10, #VCPU_HOST_CP11, r0 + restore_vfp_state #VCPU_GUEST_CP10, #VCPU_GUEST_CP11, r0 + + pop {r9-r11} + pop {r3-r5} + + pop {r0, r1, r2} + eret + .align hyp_irq: push {r0} -- 1.7.9.5