This is based on the non-lazy save/restore patch by Antonios Motakis <a.motakis at virtualopensystems.com>. In this patch, we use the Hyp Coprocessor Trap Register (HPCTR) to trap VFP/NEON instructions, and switch the FPU state at that point. This has subtleties, including that we save the host state on the stack (so we have to manage that layout carefully so a trap which exits can pop it correctly), and that the FPEXC.EN could be switched off by the host or guest. Note that we trap every time the guest touches FPEXC, which Linux does on every context switch (but it could keep that in a per-cpu variable). Signed-off-by: Rusty Russell <rusty.russell at linaro.org> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 44abdc8..71b92e6 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -85,12 +85,24 @@ enum cp15_regs { nr_cp15_regs }; +enum cp10_regs { + FPEXC, /* Floating Point Exception Control Register */ + FPSCR, /* Floating Point Status and Control Register */ + FPINST, /* Common VFP Subarchitecture Registers */ + FPINST2, + nr_cp10_regs +}; + struct kvm_vcpu_arch { struct kvm_vcpu_regs regs; /* System control coprocessor (cp15) */ u32 cp15[nr_cp15_regs]; + /* Floating point registers (VFP and Advanced SIMD/NEON) */ + u32 cp10[nr_cp10_regs]; + u32 cp11[64]; + /* Exception Information */ u32 hsr; /* Hyp Syndrom Register */ u32 hdfar; /* Hyp Data Fault Address Register */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index c8c1b91..5bd1849 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -161,6 +161,8 @@ int main(void) DEFINE(VCPU_TID_URW, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URW])); DEFINE(VCPU_TID_URO, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URO])); DEFINE(VCPU_TID_PRIV, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_PRIV])); + DEFINE(VCPU_CP10, offsetof(struct kvm_vcpu, arch.cp10)); + DEFINE(VCPU_CP11, offsetof(struct kvm_vcpu, arch.cp11)); DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 0cf4965..43c2ee6 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -21,6 +21,7 @@ #include <asm/asm-offsets.h> #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> +#include <asm/vfp.h> #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) #define VCPU_USR_SP (VCPU_USR_REG(13)) @@ -236,6 +237,77 @@ ENTRY(__kvm_flush_vm_context) mcr p15, 0, r11, c10, c2, 1 @ NMRR .endm +.macro store_vfp_state vcpu=0, vcpup + mrc p10, 7, r2, cr8, cr0, 0 @ FPEXC + @ Make sure VFP is enabled so we can touch the registers. + orr r3, r2, #FPEXC_EN + mcr p10, 7, r3, cr8, cr0, 0 @ FPEXC + .if \vcpu == 0 + vpush {d0-d15} + .else + add r11, \vcpup, #VCPU_CP11 + vstm r11!, {d0-d15} + .endif + mrc p10, 7, r9, cr7, cr0, 0 @ MVFR0 + and r9, r9, #MVFR0_A_SIMD_MASK + cmp r9, #2 @ Check for 32 registers + .if \vcpu == 0 + vpusheq {d16-d31} + .else + vstmeq r11!, {d16-d31} + .endif + + mrc p10, 7, r3, cr1, cr0, 0 @ FPSCR + tst r2, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + mrc p10, 7, r4, cr9, cr0, 0 @ FPINST + tst r2, #FPEXC_FP2V + beq 1f + mrc p10, 7, r5, cr10, cr0, 0 @ FPINST2 + +1: + .if \vcpu == 0 + push {r2-r5} + .else + add r10, \vcpup, #VCPU_CP10 + stm r10, {r2-r5} @ Save FPEXC, FPSCR, FPINST, FPINST2 + .endif +.endm + +/* Assumed FPEXC.EN on. */ +.macro restore_vfp_state vcpu=0, vcpup + .if \vcpu == 0 + pop {r2-r5} + .else + add r10, \vcpup, #VCPU_CP10 + ldm r10, {r2-r5} @ Load FPEXC, FPSCR, FPINST, FPINST2 + .endif + + mcr p10, 7, r3, cr1, cr0, 0 @ FPSCR + tst r2, #FPEXC_EX @ Check for VFP Subarchitecture + beq 1f + mcr p10, 7, r4, cr9, cr0, 0 @ FPINST + tst r2, #FPEXC_FP2V + beq 1f + mcr p10, 7, r5, cr10, cr0, 0 @ FPINST2 + +1: + .if \vcpu == 1 + add r11, \vcpup, #VCPU_CP11 + vldm r11!, {d0-d15} + .endif + mrc p10, 7, r9, cr7, cr0, 0 @ MVFR0 + and r9, r9, #MVFR0_A_SIMD_MASK + cmp r9, #2 @ Check for 32 registers + .if \vcpu == 0 + vpopeq {d16-d31} + vpop {d0-d15} + .else + vldmeq r11!, {d16-d31} + .endif + mcr p10, 7, r2, cr8, cr0, 0 @ FPEXC (last, in case !EN) +.endm + /* Configures the HSTR (Hyp System Trap Register) on entry/return * (hardware reset value is 0) */ .macro set_hstr entry @@ -298,6 +370,11 @@ ENTRY(__kvm_vcpu_run) @ Trap coprocessor CRx for all x except 2 and 14 set_hstr 1 + @ Trap floating point accesses so we can lazy restore. + mrc p15, 4, r1, c1, c1, 2 + orr r1, r1, #((1 << 10) | (1 << 11)) @ Trap cp10 and cp11 + mcr p15, 4, r1, c1, c1, 2 + @ Write standard A-9 CPU id in MIDR ldr r1, [r0, #VCPU_MIDR] mcr p15, 4, r1, c0, c0, 0 @@ -345,6 +422,22 @@ __kvm_vcpu_return: @ Don't trap coprocessor accesses for host kernel set_hstr 0 + @ Save floating point registers we if let guest use them. + mrc p15, 4, r2, c1, c1, 2 + tst r2, #((1 << 10) | (1 << 11)) + beq switch_to_host_vfp + + @ Don't trap VFP accesses for host kernel. + bic r2, r2, #((1 << 10) | (1 << 11)) + mcr p15, 4, r2, c1, c1, 2 + b after_vfp_restore + +switch_to_host_vfp: + @ Switch VFP/NEON hardware state to the host's + store_vfp_state 1, r1 + restore_vfp_state + +after_vfp_restore: @ Reset Hyp-role configure_hyp_role 0, r1 @@ -532,8 +625,11 @@ guest_trap: stmia r1, {r3, r4, r5} sub r1, r1, #VCPU_USR_REG(0) - @ Check if we need the fault information lsr r2, r0, #HSR_EC_SHIFT + cmp r2, #HSR_EC_CP_0_13 + beq switch_to_guest_vfp + + @ Check if we need the fault information cmp r2, #HSR_EC_IABT beq 2f cmpne r2, #HSR_EC_DABT @@ -558,6 +654,25 @@ guest_trap: 1: mov r0, #ARM_EXCEPTION_HVC b __kvm_vcpu_return +switch_to_guest_vfp: + @ NEON/VFP used. Turn on VFP access. + mrc p15, 4, r2, c1, c1, 2 + bic r2, r2, #((1 << 10) | (1 << 11)) + mcr p15, 4, r2, c1, c1, 2 + + @ Push host vfp state onto stack, restore guest from VCPU ptr. + @ On a normal exit, we will pop host VFP state in __kvm_vcpu_return. + store_vfp_state + restore_vfp_state 1, r1 + + @ Last of all, push vcpuptr back into the stack. + push {r1} + + @ We just need to restore guest regs, then return to guest. + add r0, r1, #VCPU_USR_REG(0) + ldmia r0, {r0-r12} + eret + .align hyp_irq: push {r0}