Hello, I was looking into getting this patch up to date, so I had the chance to test it on the Fast Model. Unfortunately with your changes included I'm unable to boot a Linux guest; init fails to start somehow: Freeing init memory: 164K Kernel panic - not syncing: Attempted to kill init! I spent some time investigating trying to find the cause, but nothing so far... I will investigate more, but I thought I'll let you know in case you have seen this before. KVM version is the v7 patchset and the guest is a simple busybox system. Best regards, Antonios On Wed, May 23, 2012 at 9:22 AM, Rusty Russell <rusty.russell at linaro.org>wrote: > This is based on the non-lazy save/restore patch by > Antonios Motakis <a.motakis at virtualopensystems.com>. > > In this patch, we use the Hyp Coprocessor Trap Register > (HPCTR) to trap VFP/NEON instructions, and switch the FPU > state at that point. This has subtleties, including that > we save the host state on the stack (so we have to manage > that layout carefully so a trap which exits can pop it > correctly), and that the FPEXC.EN could be switched off by > the host or guest. > > Note that we trap every time the guest touches FPEXC, which > Linux does on every context switch (but it could keep that > in a per-cpu variable). > > Signed-off-by: Rusty Russell <rusty.russell at linaro.org> > > diff --git a/arch/arm/include/asm/kvm_host.h > b/arch/arm/include/asm/kvm_host.h > index 44abdc8..71b92e6 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -85,12 +85,24 @@ enum cp15_regs { > nr_cp15_regs > }; > > +enum cp10_regs { > + FPEXC, /* Floating Point Exception Control > Register */ > + FPSCR, /* Floating Point Status and Control > Register */ > + FPINST, /* Common VFP Subarchitecture Registers */ > + FPINST2, > + nr_cp10_regs > +}; > + > struct kvm_vcpu_arch { > struct kvm_vcpu_regs regs; > > /* System control coprocessor (cp15) */ > u32 cp15[nr_cp15_regs]; > > + /* Floating point registers (VFP and Advanced SIMD/NEON) */ > + u32 cp10[nr_cp10_regs]; > + u32 cp11[64]; > + > /* Exception Information */ > u32 hsr; /* Hyp Syndrom Register */ > u32 hdfar; /* Hyp Data Fault Address Register */ > diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c > index c8c1b91..5bd1849 100644 > --- a/arch/arm/kernel/asm-offsets.c > +++ b/arch/arm/kernel/asm-offsets.c > @@ -161,6 +161,8 @@ int main(void) > DEFINE(VCPU_TID_URW, offsetof(struct kvm_vcpu, > arch.cp15[c13_TID_URW])); > DEFINE(VCPU_TID_URO, offsetof(struct kvm_vcpu, > arch.cp15[c13_TID_URO])); > DEFINE(VCPU_TID_PRIV, offsetof(struct kvm_vcpu, > arch.cp15[c13_TID_PRIV])); > + DEFINE(VCPU_CP10, offsetof(struct kvm_vcpu, arch.cp10)); > + DEFINE(VCPU_CP11, offsetof(struct kvm_vcpu, arch.cp11)); > DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); > DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, > arch.regs.usr_regs)); > DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, > arch.regs.svc_regs)); > diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S > index 0cf4965..43c2ee6 100644 > --- a/arch/arm/kvm/interrupts.S > +++ b/arch/arm/kvm/interrupts.S > @@ -21,6 +21,7 @@ > #include <asm/asm-offsets.h> > #include <asm/kvm_asm.h> > #include <asm/kvm_arm.h> > +#include <asm/vfp.h> > > #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) > #define VCPU_USR_SP (VCPU_USR_REG(13)) > @@ -236,6 +237,77 @@ ENTRY(__kvm_flush_vm_context) > mcr p15, 0, r11, c10, c2, 1 @ NMRR > .endm > > +.macro store_vfp_state vcpu=0, vcpup > + mrc p10, 7, r2, cr8, cr0, 0 @ FPEXC > + @ Make sure VFP is enabled so we can touch the registers. > + orr r3, r2, #FPEXC_EN > + mcr p10, 7, r3, cr8, cr0, 0 @ FPEXC > + .if \vcpu == 0 > + vpush {d0-d15} > + .else > + add r11, \vcpup, #VCPU_CP11 > + vstm r11!, {d0-d15} > + .endif > + mrc p10, 7, r9, cr7, cr0, 0 @ MVFR0 > + and r9, r9, #MVFR0_A_SIMD_MASK > + cmp r9, #2 @ Check for 32 registers > + .if \vcpu == 0 > + vpusheq {d16-d31} > + .else > + vstmeq r11!, {d16-d31} > + .endif > + > + mrc p10, 7, r3, cr1, cr0, 0 @ FPSCR > + tst r2, #FPEXC_EX @ Check for VFP > Subarchitecture > + beq 1f > + mrc p10, 7, r4, cr9, cr0, 0 @ FPINST > + tst r2, #FPEXC_FP2V > + beq 1f > + mrc p10, 7, r5, cr10, cr0, 0 @ FPINST2 > + > +1: > + .if \vcpu == 0 > + push {r2-r5} > + .else > + add r10, \vcpup, #VCPU_CP10 > + stm r10, {r2-r5} @ Save FPEXC, FPSCR, FPINST, > FPINST2 > + .endif > +.endm > + > +/* Assumed FPEXC.EN on. */ > +.macro restore_vfp_state vcpu=0, vcpup > + .if \vcpu == 0 > + pop {r2-r5} > + .else > + add r10, \vcpup, #VCPU_CP10 > + ldm r10, {r2-r5} @ Load FPEXC, FPSCR, FPINST, > FPINST2 > + .endif > + > + mcr p10, 7, r3, cr1, cr0, 0 @ FPSCR > + tst r2, #FPEXC_EX @ Check for VFP > Subarchitecture > + beq 1f > + mcr p10, 7, r4, cr9, cr0, 0 @ FPINST > + tst r2, #FPEXC_FP2V > + beq 1f > + mcr p10, 7, r5, cr10, cr0, 0 @ FPINST2 > + > +1: > + .if \vcpu == 1 > + add r11, \vcpup, #VCPU_CP11 > + vldm r11!, {d0-d15} > + .endif > + mrc p10, 7, r9, cr7, cr0, 0 @ MVFR0 > + and r9, r9, #MVFR0_A_SIMD_MASK > + cmp r9, #2 @ Check for 32 registers > + .if \vcpu == 0 > + vpopeq {d16-d31} > + vpop {d0-d15} > + .else > + vldmeq r11!, {d16-d31} > + .endif > + mcr p10, 7, r2, cr8, cr0, 0 @ FPEXC (last, in case !EN) > +.endm > + > /* Configures the HSTR (Hyp System Trap Register) on entry/return > * (hardware reset value is 0) */ > .macro set_hstr entry > @@ -298,6 +370,11 @@ ENTRY(__kvm_vcpu_run) > @ Trap coprocessor CRx for all x except 2 and 14 > set_hstr 1 > > + @ Trap floating point accesses so we can lazy restore. > + mrc p15, 4, r1, c1, c1, 2 > + orr r1, r1, #((1 << 10) | (1 << 11)) @ Trap cp10 and > cp11 > + mcr p15, 4, r1, c1, c1, 2 > + > @ Write standard A-9 CPU id in MIDR > ldr r1, [r0, #VCPU_MIDR] > mcr p15, 4, r1, c0, c0, 0 > @@ -345,6 +422,22 @@ __kvm_vcpu_return: > @ Don't trap coprocessor accesses for host kernel > set_hstr 0 > > + @ Save floating point registers we if let guest use them. > + mrc p15, 4, r2, c1, c1, 2 > + tst r2, #((1 << 10) | (1 << 11)) > + beq switch_to_host_vfp > + > + @ Don't trap VFP accesses for host kernel. > + bic r2, r2, #((1 << 10) | (1 << 11)) > + mcr p15, 4, r2, c1, c1, 2 > + b after_vfp_restore > + > +switch_to_host_vfp: > + @ Switch VFP/NEON hardware state to the host's > + store_vfp_state 1, r1 > + restore_vfp_state > + > +after_vfp_restore: > @ Reset Hyp-role > configure_hyp_role 0, r1 > > @@ -532,8 +625,11 @@ guest_trap: > stmia r1, {r3, r4, r5} > sub r1, r1, #VCPU_USR_REG(0) > > - @ Check if we need the fault information > lsr r2, r0, #HSR_EC_SHIFT > + cmp r2, #HSR_EC_CP_0_13 > + beq switch_to_guest_vfp > + > + @ Check if we need the fault information > cmp r2, #HSR_EC_IABT > beq 2f > cmpne r2, #HSR_EC_DABT > @@ -558,6 +654,25 @@ guest_trap: > 1: mov r0, #ARM_EXCEPTION_HVC > b __kvm_vcpu_return > > +switch_to_guest_vfp: > + @ NEON/VFP used. Turn on VFP access. > + mrc p15, 4, r2, c1, c1, 2 > + bic r2, r2, #((1 << 10) | (1 << 11)) > + mcr p15, 4, r2, c1, c1, 2 > + > + @ Push host vfp state onto stack, restore guest from VCPU ptr. > + @ On a normal exit, we will pop host VFP state in > __kvm_vcpu_return. > + store_vfp_state > + restore_vfp_state 1, r1 > + > + @ Last of all, push vcpuptr back into the stack. > + push {r1} > + > + @ We just need to restore guest regs, then return to guest. > + add r0, r1, #VCPU_USR_REG(0) > + ldmia r0, {r0-r12} > + eret > + > .align > hyp_irq: > push {r0} > -------------- next part -------------- An HTML attachment was scrubbed... URL: https://lists.cs.columbia.edu/pipermail/android-virt/attachments/20120625/c853c420/attachment-0001.html