[Android-virt] [PATCH] ARM: KVM: lazy save/restore of vfp/neon.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,

I was looking into getting this patch up to date, so I had the chance to
test it on the Fast Model. Unfortunately with your changes included I'm
unable to boot a Linux guest; init fails to start somehow:

Freeing init memory: 164K
Kernel panic - not syncing: Attempted to kill init!

I spent some time investigating trying to find the cause, but nothing so
far... I will investigate more, but I thought I'll let you know in case you
have seen this before. KVM version is the v7 patchset and the guest is a
simple busybox system.

Best regards,
Antonios

On Wed, May 23, 2012 at 9:22 AM, Rusty Russell <rusty.russell at linaro.org>wrote:

> This is based on the non-lazy save/restore patch by
> Antonios Motakis <a.motakis at virtualopensystems.com>.
>
> In this patch, we use the Hyp Coprocessor Trap Register
> (HPCTR) to trap VFP/NEON instructions, and switch the FPU
> state at that point.  This has subtleties, including that
> we save the host state on the stack (so we have to manage
> that layout carefully so a trap which exits can pop it
> correctly), and that the FPEXC.EN could be switched off by
> the host or guest.
>
> Note that we trap every time the guest touches FPEXC, which
> Linux does on every context switch (but it could keep that
> in a per-cpu variable).
>
> Signed-off-by: Rusty Russell <rusty.russell at linaro.org>
>
> diff --git a/arch/arm/include/asm/kvm_host.h
> b/arch/arm/include/asm/kvm_host.h
> index 44abdc8..71b92e6 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -85,12 +85,24 @@ enum cp15_regs {
>        nr_cp15_regs
>  };
>
> +enum cp10_regs {
> +       FPEXC,                  /* Floating Point Exception Control
> Register */
> +       FPSCR,                  /* Floating Point Status and Control
> Register */
> +       FPINST,                 /* Common VFP Subarchitecture Registers */
> +       FPINST2,
> +       nr_cp10_regs
> +};
> +
>  struct kvm_vcpu_arch {
>        struct kvm_vcpu_regs regs;
>
>        /* System control coprocessor (cp15) */
>        u32 cp15[nr_cp15_regs];
>
> +       /* Floating point registers (VFP and Advanced SIMD/NEON) */
> +       u32 cp10[nr_cp10_regs];
> +       u32 cp11[64];
> +
>        /* Exception Information */
>        u32 hsr;                /* Hyp Syndrom Register */
>        u32 hdfar;              /* Hyp Data Fault Address Register */
> diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
> index c8c1b91..5bd1849 100644
> --- a/arch/arm/kernel/asm-offsets.c
> +++ b/arch/arm/kernel/asm-offsets.c
> @@ -161,6 +161,8 @@ int main(void)
>   DEFINE(VCPU_TID_URW,         offsetof(struct kvm_vcpu,
> arch.cp15[c13_TID_URW]));
>   DEFINE(VCPU_TID_URO,         offsetof(struct kvm_vcpu,
> arch.cp15[c13_TID_URO]));
>   DEFINE(VCPU_TID_PRIV,                offsetof(struct kvm_vcpu,
> arch.cp15[c13_TID_PRIV]));
> +  DEFINE(VCPU_CP10,            offsetof(struct kvm_vcpu, arch.cp10));
> +  DEFINE(VCPU_CP11,            offsetof(struct kvm_vcpu, arch.cp11));
>   DEFINE(VCPU_REGS,            offsetof(struct kvm_vcpu, arch.regs));
>   DEFINE(VCPU_USR_REGS,                offsetof(struct kvm_vcpu,
> arch.regs.usr_regs));
>   DEFINE(VCPU_SVC_REGS,                offsetof(struct kvm_vcpu,
> arch.regs.svc_regs));
> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> index 0cf4965..43c2ee6 100644
> --- a/arch/arm/kvm/interrupts.S
> +++ b/arch/arm/kvm/interrupts.S
> @@ -21,6 +21,7 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_arm.h>
> +#include <asm/vfp.h>
>
>  #define VCPU_USR_REG(_reg_nr)  (VCPU_USR_REGS + (_reg_nr * 4))
>  #define VCPU_USR_SP            (VCPU_USR_REG(13))
> @@ -236,6 +237,77 @@ ENTRY(__kvm_flush_vm_context)
>        mcr     p15, 0, r11, c10, c2, 1 @ NMRR
>  .endm
>
> +.macro store_vfp_state vcpu=0, vcpup
> +       mrc     p10, 7, r2, cr8, cr0, 0         @ FPEXC
> +       @ Make sure VFP is enabled so we can touch the registers.
> +       orr     r3, r2, #FPEXC_EN
> +       mcr     p10, 7, r3, cr8, cr0, 0         @ FPEXC
> +       .if \vcpu == 0
> +       vpush   {d0-d15}
> +       .else
> +       add     r11, \vcpup, #VCPU_CP11
> +       vstm    r11!, {d0-d15}
> +       .endif
> +       mrc     p10, 7, r9, cr7, cr0, 0         @ MVFR0
> +       and     r9, r9, #MVFR0_A_SIMD_MASK
> +       cmp     r9, #2                          @ Check for 32 registers
> +       .if \vcpu == 0
> +       vpusheq {d16-d31}
> +       .else
> +       vstmeq  r11!, {d16-d31}
> +       .endif
> +
> +       mrc     p10, 7, r3, cr1, cr0, 0         @ FPSCR
> +       tst     r2, #FPEXC_EX                   @ Check for VFP
> Subarchitecture
> +       beq     1f
> +       mrc     p10, 7, r4, cr9, cr0, 0         @ FPINST
> +       tst     r2, #FPEXC_FP2V
> +       beq     1f
> +       mrc     p10, 7, r5, cr10, cr0, 0        @ FPINST2
> +
> +1:
> +       .if \vcpu == 0
> +       push    {r2-r5}
> +       .else
> +       add     r10, \vcpup, #VCPU_CP10
> +       stm     r10, {r2-r5}            @ Save FPEXC, FPSCR, FPINST,
> FPINST2
> +       .endif
> +.endm
> +
> +/* Assumed FPEXC.EN on. */
> +.macro restore_vfp_state vcpu=0, vcpup
> +       .if \vcpu == 0
> +       pop     {r2-r5}
> +       .else
> +       add     r10, \vcpup, #VCPU_CP10
> +       ldm     r10, {r2-r5}            @ Load FPEXC, FPSCR, FPINST,
> FPINST2
> +       .endif
> +
> +       mcr     p10, 7, r3, cr1, cr0, 0         @ FPSCR
> +       tst     r2, #FPEXC_EX                   @ Check for VFP
> Subarchitecture
> +       beq     1f
> +       mcr     p10, 7, r4, cr9, cr0, 0         @ FPINST
> +       tst     r2, #FPEXC_FP2V
> +       beq     1f
> +       mcr     p10, 7, r5, cr10, cr0, 0        @ FPINST2
> +
> +1:
> +       .if \vcpu == 1
> +       add     r11, \vcpup, #VCPU_CP11
> +       vldm    r11!, {d0-d15}
> +       .endif
> +       mrc     p10, 7, r9, cr7, cr0, 0         @ MVFR0
> +       and     r9, r9, #MVFR0_A_SIMD_MASK
> +       cmp     r9, #2                          @ Check for 32 registers
> +       .if \vcpu == 0
> +       vpopeq  {d16-d31}
> +       vpop    {d0-d15}
> +       .else
> +       vldmeq  r11!, {d16-d31}
> +       .endif
> +       mcr     p10, 7, r2, cr8, cr0, 0         @ FPEXC (last, in case !EN)
> +.endm
> +
>  /* Configures the HSTR (Hyp System Trap Register) on entry/return
>  * (hardware reset value is 0) */
>  .macro set_hstr entry
> @@ -298,6 +370,11 @@ ENTRY(__kvm_vcpu_run)
>        @ Trap coprocessor CRx for all x except 2 and 14
>        set_hstr 1
>
> +       @ Trap floating point accesses so we can lazy restore.
> +       mrc     p15, 4, r1, c1, c1, 2
> +       orr     r1, r1, #((1 << 10) | (1 << 11))        @ Trap cp10 and
> cp11
> +       mcr     p15, 4, r1, c1, c1, 2
> +
>        @ Write standard A-9 CPU id in MIDR
>        ldr     r1, [r0, #VCPU_MIDR]
>        mcr     p15, 4, r1, c0, c0, 0
> @@ -345,6 +422,22 @@ __kvm_vcpu_return:
>        @ Don't trap coprocessor accesses for host kernel
>        set_hstr 0
>
> +       @ Save floating point registers we if let guest use them.
> +       mrc     p15, 4, r2, c1, c1, 2
> +       tst     r2, #((1 << 10) | (1 << 11))
> +       beq     switch_to_host_vfp
> +
> +       @ Don't trap VFP accesses for host kernel.
> +       bic     r2, r2, #((1 << 10) | (1 << 11))
> +       mcr     p15, 4, r2, c1, c1, 2
> +       b       after_vfp_restore
> +
> +switch_to_host_vfp:
> +       @ Switch VFP/NEON hardware state to the host's
> +       store_vfp_state 1, r1
> +       restore_vfp_state
> +
> +after_vfp_restore:
>        @ Reset Hyp-role
>        configure_hyp_role 0, r1
>
> @@ -532,8 +625,11 @@ guest_trap:
>        stmia   r1, {r3, r4, r5}
>        sub     r1, r1, #VCPU_USR_REG(0)
>
> -       @ Check if we need the fault information
>        lsr     r2, r0, #HSR_EC_SHIFT
> +       cmp     r2, #HSR_EC_CP_0_13
> +       beq     switch_to_guest_vfp
> +
> +       @ Check if we need the fault information
>        cmp     r2, #HSR_EC_IABT
>        beq     2f
>        cmpne   r2, #HSR_EC_DABT
> @@ -558,6 +654,25 @@ guest_trap:
>  1:     mov     r0, #ARM_EXCEPTION_HVC
>        b       __kvm_vcpu_return
>
> +switch_to_guest_vfp:
> +       @ NEON/VFP used.  Turn on VFP access.
> +       mrc     p15, 4, r2, c1, c1, 2
> +       bic     r2, r2, #((1 << 10) | (1 << 11))
> +       mcr     p15, 4, r2, c1, c1, 2
> +
> +       @ Push host vfp state onto stack, restore guest from VCPU ptr.
> +       @ On a normal exit, we will pop host VFP state in
> __kvm_vcpu_return.
> +       store_vfp_state
> +       restore_vfp_state 1, r1
> +
> +       @ Last of all, push vcpuptr back into the stack.
> +       push    {r1}
> +
> +       @ We just need to restore guest regs, then return to guest.
> +       add     r0, r1, #VCPU_USR_REG(0)
> +       ldmia   r0, {r0-r12}
> +       eret
> +
>        .align
>  hyp_irq:
>        push    {r0}
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: https://lists.cs.columbia.edu/pipermail/android-virt/attachments/20120625/c853c420/attachment-0001.html


[Index of Archives]     [Linux KVM]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux