On 07/03/2012 12:01 PM, Christoffer Dall wrote: > Provides complete world-switch implementation to switch to other guests > running in non-secure modes. Includes Hyp exception handlers that > capture necessary exception information and stores the information on > the VCPU and KVM structures. > > The following Hyp-ABI is also documented in the code: > > Hyp-ABI: Switching from host kernel to Hyp-mode: > Switching to Hyp mode is done through a simple HVC instructions. The > exception vector code will check that the HVC comes from VMID==0 and if > so will store the necessary state on the Hyp stack, which will look like > this (growing downwards, see the hyp_hvc handler): > ... > stack_page + 4: spsr (Host-SVC cpsr) > stack_page : lr_usr > --------------: stack bottom > > Hyp-ABI: Switching from Hyp-mode to host kernel SVC mode: > When returning from Hyp mode to SVC mode, another HVC instruction is > executed from Hyp mode, which is taken in the hyp_svc handler. The > bottom of the Hyp is derived from the Hyp stack pointer (only a single > page aligned stack is used per CPU) and the initial SVC registers are > used to restore the host state. > > Otherwise, the world-switch is pretty straight-forward. All state that > can be modified by the guest is first backed up on the Hyp stack and the > VCPU values is loaded onto the hardware. State, which is not loaded, but > theoretically modifiable by the guest is protected through the > virtualiation features to generate a trap and cause software emulation. > Upon guest returns, all state is restored from hardware onto the VCPU > struct and the original state is restored from the Hyp-stack onto the > hardware. > > One controversy may be the back-door call to __irq_svc (the host > kernel's own physical IRQ handler) which is called when a physical IRQ > exception is taken in Hyp mode while running in the guest. > > SMP support using the VMPIDR calculated on the basis of the host MPIDR > and overriding the low bits with KVM vcpu_id contributed by Marc Zyngier. He should sign off on this patch then. > > Reuse of VMIDs has been implemented by Antonios Motakis and adapated from > a separate patch into the appropriate patches introducing the > functionality. Note that the VMIDs are stored per VM as required by the ARM > architecture reference manual. Ditto. > diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h > index 220f241..232117c 100644 > --- a/arch/arm/include/asm/kvm_arm.h > +++ b/arch/arm/include/asm/kvm_arm.h > @@ -105,6 +105,17 @@ > #define TTBCR_T0SZ 3 > #define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0) > > +/* Hyp System Trap Register */ > +#define HSTR_T(x) (1 << x) > +#define HSTR_TTEE (1 << 16) > +#define HSTR_TJDBX (1 << 17) > + > +/* Hyp Coprocessor Trap Register */ > +#define HCPTR_TCP(x) (1 << x) > +#define HCPTR_TCP_MASK (0x3fff) > +#define HCPTR_TASE (1 << 15) > +#define HCPTR_TTA (1 << 20) > +#define HCPTR_TCPAC (1 << 31) > > /* Virtualization Translation Control Register (VTCR) bits */ > #define VTCR_SH0 (3 << 12) > @@ -126,5 +137,31 @@ > #define VTTBR_X (5 - VTCR_GUEST_T0SZ) > #endif > > +/* Hyp Syndrome Register (HSR) bits */ > +#define HSR_EC_SHIFT (26) > +#define HSR_EC (0x3fU << HSR_EC_SHIFT) > +#define HSR_IL (1U << 25) > +#define HSR_ISS (HSR_IL - 1) > +#define HSR_ISV_SHIFT (24) > +#define HSR_ISV (1U << HSR_ISV_SHIFT) > + > +#define HSR_EC_UNKNOWN (0x00) > +#define HSR_EC_WFI (0x01) > +#define HSR_EC_CP15_32 (0x03) > +#define HSR_EC_CP15_64 (0x04) > +#define HSR_EC_CP14_MR (0x05) > +#define HSR_EC_CP14_LS (0x06) > +#define HSR_EC_CP_0_13 (0x07) > +#define HSR_EC_CP10_ID (0x08) > +#define HSR_EC_JAZELLE (0x09) > +#define HSR_EC_BXJ (0x0A) > +#define HSR_EC_CP14_64 (0x0C) > +#define HSR_EC_SVC_HYP (0x11) > +#define HSR_EC_HVC (0x12) > +#define HSR_EC_SMC (0x13) > +#define HSR_EC_IABT (0x20) > +#define HSR_EC_IABT_HYP (0x21) > +#define HSR_EC_DABT (0x24) > +#define HSR_EC_DABT_HYP (0x25) > > #endif /* __ARM_KVM_ARM_H__ */ > diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c > index b57c75e..38d3a12 100644 > --- a/arch/arm/kernel/armksyms.c > +++ b/arch/arm/kernel/armksyms.c > @@ -48,6 +48,13 @@ extern void __aeabi_ulcmp(void); > > extern void fpundefinstr(void); > > +#ifdef CONFIG_KVM_ARM_HOST > +/* This is needed for KVM */ > +extern void __irq_svc(void); > + > +EXPORT_SYMBOL_GPL(__irq_svc); > +#endif > + > /* platform dependent support */ > EXPORT_SYMBOL(__udelay); > EXPORT_SYMBOL(__const_udelay); > diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c > index 1429d89..9c76b53 100644 > --- a/arch/arm/kernel/asm-offsets.c > +++ b/arch/arm/kernel/asm-offsets.c > @@ -13,6 +13,7 @@ > #include <linux/sched.h> > #include <linux/mm.h> > #include <linux/dma-mapping.h> > +#include <linux/kvm_host.h> > #include <asm/cacheflush.h> > #include <asm/glue-df.h> > #include <asm/glue-pf.h> > @@ -144,5 +145,47 @@ int main(void) > DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); > DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); > DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); > +#ifdef CONFIG_KVM_ARM_HOST > + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); > + DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.cp15[c0_MIDR])); > + DEFINE(VCPU_MPIDR, offsetof(struct kvm_vcpu, arch.cp15[c0_MPIDR])); > + DEFINE(VCPU_SCTLR, offsetof(struct kvm_vcpu, arch.cp15[c1_SCTLR])); > + DEFINE(VCPU_CPACR, offsetof(struct kvm_vcpu, arch.cp15[c1_CPACR])); > + DEFINE(VCPU_TTBR0, offsetof(struct kvm_vcpu, arch.cp15[c2_TTBR0])); > + DEFINE(VCPU_TTBR1, offsetof(struct kvm_vcpu, arch.cp15[c2_TTBR1])); > + DEFINE(VCPU_TTBCR, offsetof(struct kvm_vcpu, arch.cp15[c2_TTBCR])); > + DEFINE(VCPU_DACR, offsetof(struct kvm_vcpu, arch.cp15[c3_DACR])); > + DEFINE(VCPU_DFSR, offsetof(struct kvm_vcpu, arch.cp15[c5_DFSR])); > + DEFINE(VCPU_IFSR, offsetof(struct kvm_vcpu, arch.cp15[c5_IFSR])); > + DEFINE(VCPU_ADFSR, offsetof(struct kvm_vcpu, arch.cp15[c5_ADFSR])); > + DEFINE(VCPU_AIFSR, offsetof(struct kvm_vcpu, arch.cp15[c5_AIFSR])); > + DEFINE(VCPU_DFAR, offsetof(struct kvm_vcpu, arch.cp15[c6_DFAR])); > + DEFINE(VCPU_IFAR, offsetof(struct kvm_vcpu, arch.cp15[c6_IFAR])); > + DEFINE(VCPU_PRRR, offsetof(struct kvm_vcpu, arch.cp15[c10_PRRR])); > + DEFINE(VCPU_NMRR, offsetof(struct kvm_vcpu, arch.cp15[c10_NMRR])); > + DEFINE(VCPU_VBAR, offsetof(struct kvm_vcpu, arch.cp15[c12_VBAR])); > + DEFINE(VCPU_CID, offsetof(struct kvm_vcpu, arch.cp15[c13_CID])); > + DEFINE(VCPU_TID_URW, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URW])); > + DEFINE(VCPU_TID_URO, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URO])); > + DEFINE(VCPU_TID_PRIV, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_PRIV])); > + DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); > + DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); > + DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); > + DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs)); > + DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs)); > + DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs)); > + DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); > + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.pc)); > + DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.cpsr)); > + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); > + DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.hsr)); > + DEFINE(VCPU_HDFAR, offsetof(struct kvm_vcpu, arch.hdfar)); > + DEFINE(VCPU_HIFAR, offsetof(struct kvm_vcpu, arch.hifar)); > + DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.hpfar)); > + DEFINE(VCPU_PC_IPA, offsetof(struct kvm_vcpu, arch.pc_ipa)); > + DEFINE(VCPU_PC_IPA2, offsetof(struct kvm_vcpu, arch.pc_ipa2)); > + DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.hyp_pc)); > + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); > +#endif > return 0; > } > diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S > index 437f0c4..db029bb 100644 > --- a/arch/arm/kernel/entry-armv.S > +++ b/arch/arm/kernel/entry-armv.S > @@ -209,6 +209,7 @@ __dabt_svc: > ENDPROC(__dabt_svc) > > .align 5 > + .globl __irq_svc > __irq_svc: > svc_entry > irq_handler > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 8b024ee..4687690 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -37,12 +37,19 @@ > #include <asm/mman.h> > #include <asm/idmap.h> > #include <asm/tlbflush.h> > +#include <asm/cputype.h> > #include <asm/kvm_arm.h> > #include <asm/kvm_asm.h> > #include <asm/kvm_mmu.h> > +#include <asm/kvm_emulate.h> > > static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); > > +/* The VMID used in the VTTBR */ > +static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); > +static u8 kvm_next_vmid; > +DEFINE_SPINLOCK(kvm_vmid_lock); static, too. > + > + > +/** > + * check_new_vmid_gen - check that the VMID is still valid > + * @kvm: The VM's VMID to checkt > + * > + * return true if there is a new generation of VMIDs being used > + * > + * The hardware supports only 256 values with the value zero reserved for the > + * host, so we check if an assigned value belongs to a previous generation, > + * which which requires us to assign a new value. If we're the first to use a > + * VMID for the new generation, we must flush necessary caches and TLBs on all > + * CPUs. > + */ > +static bool check_new_vmid_gen(struct kvm *kvm) > +{ > + return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen)); > +} Better have the name indicate what a true return value means, like 'need_new_vmid_gen()'. > + > +/** > + * update_vttbr - Update the VTTBR with a valid VMID before the guest runs > + * @kvm The guest that we are about to run > + * > + * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the > + * VM has a valid VMID, otherwise assigns a new one and flushes corresponding > + * caches and TLBs. > + */ > +static void update_vttbr(struct kvm *kvm) > +{ > + phys_addr_t pgd_phys; > + > + if (!check_new_vmid_gen(kvm)) > + return; > + > + spin_lock(&kvm_vmid_lock); > + > + /* First user of a new VMID generation? */ > + if (unlikely(kvm_next_vmid == 0)) { > + atomic64_inc(&kvm_vmid_gen); > + kvm_next_vmid = 1; > + > + /* This does nothing on UP */ > + smp_call_function(reset_vm_context, NULL, 1); > + > + /* > + * On SMP we know no other CPUs can use this CPU's or > + * each other's VMID since the kvm_vmid_lock blocks > + * them from reentry to the guest. > + */ > + > + reset_vm_context(NULL); on_each_cpu() will combine the two lines above. > + } > + > + kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen); > + kvm->arch.vmid = kvm_next_vmid; > + kvm_next_vmid++; > + > + /* update vttbr to be used with the new vmid */ > + pgd_phys = virt_to_phys(kvm->arch.pgd); > + kvm->arch.vttbr = pgd_phys & ((1LLU << 40) - 1) > + & ~((2 << VTTBR_X) - 1); > + kvm->arch.vttbr |= (u64)(kvm->arch.vmid) << 48; > + > + spin_unlock(&kvm_vmid_lock); > +} > + > +/* > + * Return 0 to return to guest, < 0 on error, exit_reason ( > 0) on proper > + * exit to QEMU. > + */ > +static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, > + int exception_index) > +{ > + return -EINVAL; x86 returns KVM_EXIT_INTERNAL_ERROR when it encounters an unhandlable exit. -EINVAL indicates that the user has done something wrong, which isn't the case here. > +} > + > +/* > + * Return 0 to proceed with guest entry > + */ > +static int vcpu_pre_guest_enter(struct kvm_vcpu *vcpu, int *exit_reason) > +{ > + if (signal_pending(current)) { > + *exit_reason = KVM_EXIT_INTR; > + return -EINTR; > + } > + > + if (check_new_vmid_gen(vcpu->kvm)) > + return 1; > + > + BUG_ON(__vcpu_mode(*vcpu_cpsr(vcpu)) == 0xf); > + > return 0; > } > > +/** > + * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code > + * @vcpu: The VCPU pointer > + * @run: The kvm_run structure pointer used for userspace state exchange > + * > + * This function is called through the VCPU_RUN ioctl called from user space. It > + * will execute VM code in a loop until the time slice for the process is used > + * or some emulation is needed from user space in which case the function will > + * return with return value 0 and with the kvm_run structure filled in with the > + * required data for the requested emulation. > + */ > int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > { > - return -EINVAL; > + int ret = 0; > + int exit_reason; > + sigset_t sigsaved; > + > + if (vcpu->sigset_active) > + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); > + We should move this to common code. But I don't mind if this is done post merge. > + exit_reason = KVM_EXIT_UNKNOWN; > + while (exit_reason == KVM_EXIT_UNKNOWN) { Looping over 'ret' is more in line with x86 and clearer IMO. x86 uses the convention: < 0 -> return to userspace with error, 0 -> return to userspace, 1 -> loop. > + /* > + * Check conditions before entering the guest > + */ > + cond_resched(); > + > + update_vttbr(vcpu->kvm); > + > + local_irq_disable(); > + > + /* Re-check atomic conditions */ > + ret = vcpu_pre_guest_enter(vcpu, &exit_reason); > + if (ret != 0) { > + local_irq_enable(); > + preempt_enable(); > + continue; See - you continue, only to break out of the loop due to a side effect on exit_reason. > + } > + > + /************************************************************** > + * Enter the guest > + */ > + trace_kvm_entry(vcpu->arch.regs.pc); > + kvm_guest_enter(); > + vcpu->mode = IN_GUEST_MODE; > + > + ret = __kvm_vcpu_run(vcpu); > + > + vcpu->mode = OUTSIDE_GUEST_MODE; > + vcpu->stat.exits++; The tracepoint above should be sufficient for statistics. > + kvm_guest_exit(); > + trace_kvm_exit(vcpu->arch.regs.pc); > + local_irq_enable(); > + > + /* > + * Back from guest > + *************************************************************/ > + > + ret = handle_exit(vcpu, run, ret); > + if (ret < 0) { > + kvm_err("Error in handle_exit\n"); > + break; > + } else { > + exit_reason = ret; /* 0 == KVM_EXIT_UNKNOWN */ > + } > + } > + > + if (vcpu->sigset_active) > + sigprocmask(SIG_SETMASK, &sigsaved, NULL); > + > + run->exit_reason = exit_reason; > + return ret; > } > -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html