Re: [RFC PATCH 1/5] ARM: KVM: Change HYP ABI from continuation to function call

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Oct 2, 2012, at 4:54 AM, Marc Zyngier <marc.zyngier@xxxxxxx> wrote:

> On 01/10/12 19:39, Christoffer Dall wrote:
>> On Mon, Oct 1, 2012 at 9:42 AM, Marc Zyngier <marc.zyngier@xxxxxxx> wrote:
>>> The current HYP ABI has the following semantics:
>>> 
>>>        <svc code>
>>>        hvc #0  ; Enter HYP mode
>>>        <hyp code>
>>>        hvc #0  ; Exit HYP mode
>>>        <svc code>
>>> 
>>> This relies on the implicit assumption that you can map both SVC and
>>> HYP code at the same virtual address so you can seamlessly carry on
>>> executing code in sequence after switching mode.
>>> 
>>> This assumption may not be true for ARMv8, if the kernel is located
>>> high enough to be out of reach of HTTBR.
>>> 
>>> This patch changes the ABI so the call into HYP becomes a function
>>> call:
>>> 
>>>        <svc code>
>>>        ldr     r0, =BSYM(my_hyp_fn)
>>>        ldr     r1, =my_param
>>>        hvc #0  ; Call my_hyp_fn(my_param) from HYP mode
>>>        <svc code>
>>> 
>>> This allows the HVC handler to offset the function address passed
>>> from SVC so it can be called. The ABI only supports up to 3 32bit
>>> parameters, and can return up to 64bits of data.
>>> 
>>>> From the host point of view, any function mapped in HYP mode can
>>> be called using the kvm_call_hyp() helper:
>>> 
>>>        u64 kvm_call_hyp(void *hypfn, ...);
>>> 
>>> For example, calling __kvm_vcpu_run(vcpu) is replaced by:
>>> 
>>>        kvm_call_hyp(__kvm_vcpu_run, vcpu);
>>> 
>>> Tested on TC2.
>>> 
>>> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>
>>> ---
>>> arch/arm/include/asm/kvm_host.h |   1 +
>>> arch/arm/kvm/arm.c              |   4 +-
>>> arch/arm/kvm/interrupts.S       | 104 +++++++++++++++++++---------------------
>>> arch/arm/kvm/mmu.c              |  20 ++++++--
>>> 4 files changed, 66 insertions(+), 63 deletions(-)
>>> 
>>> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
>>> index a1c87e3..25ab5be 100644
>>> --- a/arch/arm/include/asm/kvm_host.h
>>> +++ b/arch/arm/include/asm/kvm_host.h
>>> @@ -173,5 +173,6 @@ unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
>>> struct kvm_one_reg;
>>> int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
>>> int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
>>> +u64 kvm_call_hyp(void *hypfn, ...);
>>> 
>>> #endif /* __ARM_KVM_HOST_H__ */
>>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>>> index d64ce2a..7f2ea3d 100644
>>> --- a/arch/arm/kvm/arm.c
>>> +++ b/arch/arm/kvm/arm.c
>>> @@ -393,7 +393,7 @@ int kvm_arch_vcpu_in_guest_mode(struct kvm_vcpu *v)
>>> 
>>> static void reset_vm_context(void *info)
>>> {
>>> -       __kvm_flush_vm_context();
>>> +       kvm_call_hyp(__kvm_flush_vm_context);
>>> }
>>> 
>>> /**
>>> @@ -683,7 +683,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>>                        /* This means ignore, try again. */
>>>                        ret = ARM_EXCEPTION_IRQ;
>>>                } else {
>>> -                       ret = __kvm_vcpu_run(vcpu);
>>> +                       ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
>>>                }
>>> 
>>>                vcpu->mode = OUTSIDE_GUEST_MODE;
>>> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
>>> index c171961..849ff2a 100644
>>> --- a/arch/arm/kvm/interrupts.S
>>> +++ b/arch/arm/kvm/interrupts.S
>>> @@ -20,6 +20,7 @@
>>> #include <linux/const.h>
>>> #include <asm/unified.h>
>>> #include <asm/page.h>
>>> +#include <asm/ptrace.h>
>>> #include <asm/asm-offsets.h>
>>> #include <asm/kvm_asm.h>
>>> #include <asm/kvm_arm.h>
>>> @@ -44,7 +45,6 @@ __kvm_hyp_code_start:
>>>  * hardware assisted version.
>>>  */
>>> ENTRY(__kvm_tlb_flush_vmid)
>>> -       hvc     #0                      @ Switch to Hyp mode
>>>        push    {r2, r3}
>>> 
>>>        add     r0, r0, #KVM_VTTBR
>>> @@ -59,7 +59,6 @@ ENTRY(__kvm_tlb_flush_vmid)
>>>        mcrr    p15, 6, r2, r3, c2      @ Back to VMID #0
>>> 
>>>        pop     {r2, r3}
>>> -       hvc     #0                      @ Back to SVC
>> 
>> now perhaps we should re-instate that isb, since the function may be
>> called without taking an exception straight after.
> 
> We do have an exception return (the eret at the end of
> host_switch_to_hyp), and that is enough to ensure proper
> synchronization. See B3.15.5 ("Synchronization of changes to system
> control registers") and the definition of "Context synchronization
> operation".
> 

That wasn't my point: just because the way we call the function now is followed by an erect does not mean this is the case for future callers - some hyp mode code may simply call this function and expect it to work. At least there should be a comment explaining this! But I think the ISB should just be there - these operations are rare and slow anyway. 


>>>        bx      lr
>>> ENDPROC(__kvm_tlb_flush_vmid)
>>> 
>>> @@ -69,14 +68,11 @@ ENDPROC(__kvm_tlb_flush_vmid)
>>>  * void __kvm_flush_vm_context(void);
>>>  */
>>> ENTRY(__kvm_flush_vm_context)
>>> -       hvc     #0                      @ switch to hyp-mode
>>> -
>>>        mov     r0, #0                  @ rn parameter for c15 flushes is SBZ
>>>        mcr     p15, 4, r0, c8, c7, 4   @ Invalidate Non-secure Non-Hyp TLB
>>>        mcr     p15, 0, r0, c7, c5, 0   @ Invalidate instruction caches
>>>        dsb
>> 
>> isb back here as well?
> 
> See above.
> 
>>> 
>>> -       hvc     #0                      @ switch back to svc-mode, see hyp_svc
>>>        bx      lr
>>> ENDPROC(__kvm_flush_vm_context)
>>> 
>>> @@ -89,14 +85,15 @@ ENDPROC(__kvm_flush_vm_context)
>>> @ Arguments:
>>> @  r0: pointer to vcpu struct
>>> ENTRY(__kvm_vcpu_run)
>>> -       hvc     #0                      @ switch to hyp-mode
>>> +       @ Preserve ELR_hyp to return to the HYP trampoline
>>> +       mrs     r1, ELR_hyp
>>> +       push    {r1}
>> 
>> why not preserve this in the trampoline? It would make the code
>> slightly cleaner I believe and does not hurt any critical path.
> 
> Because __kvm_vcpu_run really is the exception (it is the only one that
> clobbers ELR_hyp). ELR_hyp really is part of the host context, and
> should only be saved when we perform a world switch. This is why I later
> move it to being saved as part of the host registers.
> 
> My point is that any clobbered register beyond r0-r3 is defined as
> callee saved, and I'd like to stick to that definition.
> 

Ok, fair enough. 

>>> 
>>>        @ Save the vcpu pointer
>>>        mcr     p15, 4, r0, c13, c0, 2  @ HTPIDR
>>> 
>>> -       @ Now we're in Hyp-mode and lr_usr, spsr_hyp are on the stack
>>> -       mrs     r2, sp_usr
>>> -       push    {r2}                    @ Push r13_usr
>>> +       @ Now we're in Hyp-mode and ELR_hyp, spsr_hyp are on the stack
>>> +       store_mode_state sp, usr
>>>        push    {r4-r12}                @ Push r4-r12
>>> 
>>>        store_mode_state sp, svc
>>> @@ -235,10 +232,10 @@ after_vfp_restore:
>>>        load_mode_state sp, svc
>>> 
>>>        pop     {r4-r12}                @ Pop r4-r12
>>> -       pop     {r2}                    @ Pop r13_usr
>>> -       msr     sp_usr, r2
>>> +       load_mode_state sp, usr         @ SP_usr, LR_usr
>>> 
>>> -       hvc     #0                      @ switch back to svc-mode, see hyp_svc
>>> +       pop     {r2}
>>> +       msr     ELR_hyp, r2
>>> 
>>>        clrex                           @ Clear exclusive monitor
>>>        bx      lr                      @ return to IOCTL
>>> @@ -255,8 +252,6 @@ after_vfp_restore:
>>>  * Returns 64 bit PAR value.
>>>  */
>>> ENTRY(__kvm_va_to_pa)
>>> -       hvc     #0                      @ switch to hyp-mode
>>> -
>>>        push    {r4-r12}
>>> 
>>>        @ Fold flag into r1, easier than using stack.
>>> @@ -280,7 +275,10 @@ ENTRY(__kvm_va_to_pa)
>>> 
>>>        mrrc    p15, 0, r0, r1, c7      @ PAR
>>>        pop     {r4-r12}
>>> -       hvc     #0                      @ Back to SVC
>>> +       bx      lr
>>> +
>>> +ENTRY(kvm_call_hyp)
>>> +       hvc     #0
>>>        bx      lr
>>> 
>>> 
>>> @@ -302,20 +300,11 @@ ENTRY(__kvm_va_to_pa)
>>>  * Hyp-ABI: Switching from host kernel to Hyp-mode:
>> 
>> this should be rewritten now to be phrased something like:
>> Hyp-ABI: Calling functions in Hyp mode, and the text below should be
>> adapted accordingly.
> 
> Sure.
> 
>>>  *    Switching to Hyp mode is done through a simple HVC instructions. The
>>>  *    exception vector code will check that the HVC comes from VMID==0 and if
>>> - *    so will store the necessary state on the Hyp stack, which will look like
>>> - *    this (growing downwards, see the hyp_hvc handler):
>>> - *      ...
>>> - *      stack_page + 4: spsr (Host-SVC cpsr)
>>> - *      stack_page    : lr_usr
>>> - *      --------------: stack bottom
>>> - *
>>> - * Hyp-ABI: Switching from Hyp-mode to host kernel SVC mode:
>>> - *    When returning from Hyp mode to SVC mode, another HVC instruction is
>>> - *    executed from Hyp mode, which is taken in the hyp_svc handler. The
>>> - *    bottom of the Hyp is derived from the Hyp stack pointer (only a single
>>> - *    page aligned stack is used per CPU) and the initial SVC registers are
>>> - *    used to restore the host state.
>>> - *
>>> + *    so will push the necessary state (SPSR, lr_usr) on the Hyp stack.
>>> + *    - r0 contains a pointer to a HYP function
>>> + *    - r1, r2, and r3 contain arguments to the above function.
>>> + *    - The HYP function will be called with its arguments in r0, r1 and r2.
>>> + *    On HYP function return, we return directly to SVC.
>>>  *
>>>  * Note that the above is used to execute code in Hyp-mode from a host-kernel
>>>  * point of view, and is a different concept from performing a world-switch and
>>> @@ -351,12 +340,18 @@ ENTRY(__kvm_va_to_pa)
>>>        str     r2, [r1, #VCPU_HYP_PC]
>>>        b       __kvm_vcpu_return
>>> 
>>> -       @ We were in the host already
>>> -99:    hvc     #0      @ switch to SVC mode
>>> -       ldr     r0, \panic_str
>>> +       @ We were in the host already. Let's craft a panic-ing return to SVC.
>>> +99:    mrs     r2, cpsr
>>> +       bic     r2, r2, #MODE_MASK
>>> +       orr     r2, r2, #SVC_MODE
>>> +THUMB( orr     r2, r2, #PSR_T_BIT      )
>>> +       msr     spsr_cxsf, r2
>>>        mrs     r1, ELR_hyp
>>> -       b       panic
>>> -
>>> +THUMB( ldr     r2, =BSYM(panic) + 4)
>>> +ARM(   ldr     r2, =BSYM(panic) + 8)
>> 
>> forgive my ignorance, why do we need the + 4 or + 8 offsets here?
> 
> Pure bogosity on my side, please ignore. This code path is obviously not
> tested enough... ;-)
> 

It was once;)

>>> +       msr     ELR_hyp, r2
>>> +       ldr     r0, \panic_str
>>> +       eret
>>> .endm
>>> 
>>>        .text
>>> @@ -385,17 +380,7 @@ hyp_undef:
>>> 
>>>        .align
>>> hyp_svc:
>>> -       @ Can only get here if HVC or SVC is called from Hyp, mode which means
>>> -       @ we want to change mode back to SVC mode.
>>> -       push    {r12}
>>> -       mov     r12, sp
>>> -       bic     r12, r12, #0x0ff
>>> -       bic     r12, r12, #0xf00
>>> -       ldr     lr, [r12, #4]
>>> -       msr     SPSR_csxf, lr
>>> -       ldr     lr, [r12]
>>> -       pop     {r12}
>>> -       eret
>>> +       bad_exception ARM_EXCEPTION_HVC, svc_die_str
>>> 
>>>        .align
>>> hyp_pabt:
>>> @@ -434,19 +419,24 @@ hyp_hvc:
>>>        bne     guest_trap              @ Guest called HVC
>>> 
>>> host_switch_to_hyp:
>>> -       @ Store lr_usr,spsr (svc cpsr) on bottom of stack
>>> -       mov     r1, sp
>>> -       bic     r1, r1, #0x0ff
>>> -       bic     r1, r1, #0xf00
>>> -       str     lr, [r1]
>>> -       mrs     lr, spsr
>>> -       str     lr, [r1, #4]
>>> -
>>>        pop     {r0, r1, r2}
>>> 
>>> -       @ Return to caller in Hyp mode
>>> -       mrs     lr, ELR_hyp
>>> -       mov     pc, lr
>>> +       push    {lr}
>>> +       mrs     lr, SPSR
>>> +       push    {lr}
>>> +
>>> +       mov     lr, r0
>>> +       mov     r0, r1
>>> +       mov     r1, r2
>>> +       mov     r2, r3
>>> +
>>> +THUMB( orr     lr, #1)
>>> +       blx     lr                      @ Call the HYP function
>>> +
>>> +       pop     {lr}
>>> +       msr     SPSR_csxf, lr
>>> +       pop     {lr}
>>> +       eret
>>> 
>>> guest_trap:
>>>        load_vcpu       r1              @ Load VCPU pointer
>>> @@ -525,6 +515,8 @@ pabt_die_str:
>>>        .ascii  "unexpected prefetch abort in Hyp mode at: %#08x"
>>> dabt_die_str:
>>>        .ascii  "unexpected data abort in Hyp mode at: %#08x"
>>> +svc_die_str:
>>> +       .ascii  "unexpected HVC/SVC trap in Hyp mode at: %#08x"
>>> 
>>> /*
>>>  * The below lines makes sure the HYP mode code fits in a single page (the
>>> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
>>> index a35a8a9..8f6761c 100644
>>> --- a/arch/arm/kvm/mmu.c
>>> +++ b/arch/arm/kvm/mmu.c
>>> @@ -36,6 +36,11 @@
>>> static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
>>> static pgd_t *hyp_pgd;
>>> 
>>> +static void kvm_tlb_flush_vmid(struct kvm *kvm)
>>> +{
>>> +       kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
>>> +}
>>> +
>>> static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
>>>                                  int min, int max)
>>> {
>>> @@ -393,7 +398,7 @@ static void stage2_clear_pte(struct kvm *kvm, phys_addr_t addr)
>>>        page = virt_to_page(pte);
>>>        put_page(page);
>>>        if (page_count(page) != 1) {
>>> -               __kvm_tlb_flush_vmid(kvm);
>>> +               kvm_tlb_flush_vmid(kvm);
>>>                return;
>>>        }
>>> 
>>> @@ -404,7 +409,7 @@ static void stage2_clear_pte(struct kvm *kvm, phys_addr_t addr)
>>>        page = virt_to_page(pmd);
>>>        put_page(page);
>>>        if (page_count(page) != 1) {
>>> -               __kvm_tlb_flush_vmid(kvm);
>>> +               kvm_tlb_flush_vmid(kvm);
>>>                return;
>>>        }
>>> 
>>> @@ -413,7 +418,7 @@ static void stage2_clear_pte(struct kvm *kvm, phys_addr_t addr)
>>> 
>>>        page = virt_to_page(pud);
>>>        put_page(page);
>>> -       __kvm_tlb_flush_vmid(kvm);
>>> +       kvm_tlb_flush_vmid(kvm);
>>> }
>>> 
>>> static void stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
>>> @@ -453,7 +458,7 @@ static void stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
>>>        old_pte = *pte;
>>>        set_pte_ext(pte, *new_pte, 0);
>>>        if (pte_present(old_pte))
>>> -               __kvm_tlb_flush_vmid(kvm);
>>> +               kvm_tlb_flush_vmid(kvm);
>>>        else
>>>                get_page(virt_to_page(pte));
>>> }
>>> @@ -611,6 +616,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>>        return 0;
>>> }
>>> 
>>> +static u64 kvm_va_to_pa(struct kvm_vcpu *vcpu, u32 va, bool priv)
>>> +{
>>> +       return kvm_call_hyp(__kvm_va_to_pa, vcpu, va, priv);
>>> +}
>>> +
>>> /**
>>>  * copy_from_guest_va - copy memory from guest (very slow!)
>>>  * @vcpu:      vcpu pointer
>>> @@ -631,7 +641,7 @@ static bool copy_from_guest_va(struct kvm_vcpu *vcpu,
>>>        int err;
>>> 
>>>        BUG_ON((gva & PAGE_MASK) != ((gva + len) & PAGE_MASK));
>>> -       par = __kvm_va_to_pa(vcpu, gva & PAGE_MASK, priv);
>>> +       par = kvm_va_to_pa(vcpu, gva & PAGE_MASK, priv);
>>>        if (par & 1) {
>>>                kvm_err("IO abort from invalid instruction address"
>>>                        " %#lx!\n", gva);
>>> --
>>> 1.7.12
>> 
>> this is definitely much nicer,
>> 
>> thanks.
> 
> 
> -- 
> Jazz is not dead. It just smells funny...
> 

_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm


[Index of Archives]     [Linux KVM]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux