In order to support KVM on a SMP host, it is necessary to initialize the hypervisor on all CPUs, mostly by making sure each CPU gets its own hypervisor stack and runs the HYP init code. We also take care of some missing locking of modifications to the hypervisor page tables and ensure synchronized consistency between virtual IRQ masks and wait_for_interrupt flags on the VPUs. Note that this code doesn't handle CPU hotplug yet. Note that this code doesn't support SMP guests. WARNING: This code is in development and guests do not fully boot on SMP hosts yet. Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> --- arch/arm/include/asm/kvm_host.h | 4 - arch/arm/include/asm/kvm_mmu.h | 1 arch/arm/kvm/arm.c | 175 +++++++++++++++++++++++---------------- arch/arm/kvm/emulate.c | 2 arch/arm/kvm/mmu.c | 9 ++ 5 files changed, 114 insertions(+), 77 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 86f6cf1..a0ffbe8 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -78,8 +78,6 @@ struct kvm_vcpu_arch { u32 c13_TID_PRIV; /* Thread ID, Priveleged */ } cp15; - u32 virt_irq; /* HCR exception mask */ - /* Exception Information */ u32 hsr; /* Hyp Syndrom Register */ u32 hdfar; /* Hyp Data Fault Address Register */ @@ -92,6 +90,8 @@ struct kvm_vcpu_arch { u32 mmio_rd; /* Misc. fields */ + spinlock_t irq_lock; + u32 virt_irq; /* HCR exception mask */ u32 wait_for_interrupts; }; diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index e82eae9..917edd7 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -28,6 +28,7 @@ #define PGD2_ORDER get_order(PTRS_PER_PGD2 * sizeof(pgd_t)) extern pgd_t *kvm_hyp_pgd; +extern struct mutex kvm_hyp_pgd_mutex; int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to); void free_hyp_pmds(pgd_t *hyp_pgd); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 00215a1..6e384e2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -61,7 +61,7 @@ void __kvm_print_msg(char *fmt, ...) spin_unlock(&__tmp_log_lock); } -static void *kvm_arm_hyp_stack_page; +static DEFINE_PER_CPU(void *, kvm_arm_hyp_stack_page); /* The VMID used in the VTTBR */ #define VMID_SIZE (1<<8) @@ -257,6 +257,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) unsigned long cpsr; unsigned long sctlr; + spin_lock_init(&vcpu->arch.irq_lock); + /* Init execution CPSR */ asm volatile ("mrs %[cpsr], cpsr" : [cpsr] "=r" (cpsr)); @@ -464,13 +466,27 @@ static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm, trace_kvm_irq_line(irq_level->irq % 2, irq_level->level, vcpu_idx); + spin_lock(&vcpu->arch.irq_lock); if (irq_level->level) { vcpu->arch.virt_irq |= mask; + + /* + * Note that we grab the wq.lock before clearing the wfi flag + * since this ensures that a concurrent call to kvm_vcpu_block + * will either sleep before we grab the lock, in which case we + * wake it up, or will never sleep due to + * kvm_arch_vcpu_runnable being true (iow. this avoids having + * to grab the irq_lock in kvm_arch_vcpu_runnable). + */ + spin_lock(&vcpu->wq.lock); vcpu->arch.wait_for_interrupts = 0; + if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); + __wake_up_locked(&vcpu->wq, TASK_INTERRUPTIBLE); + spin_unlock(&vcpu->wq.lock); } else vcpu->arch.virt_irq &= ~mask; + spin_unlock(&vcpu->arch.irq_lock); return 0; } @@ -505,14 +521,49 @@ long kvm_arch_vm_ioctl(struct file *filp, } } +static void cpu_set_vector(void *vector) +{ + /* + * Set the HVBAR + */ + asm volatile ( + "mov r0, %[vector_ptr]\n\t" + "ldr r7, =SMCHYP_HVBAR_W\n\t" + "smc #0\n\t" : : + [vector_ptr] "r" (vector) : + "r0", "r7"); +} + +static void cpu_init_hyp_mode(void *vector) +{ + unsigned long hyp_stack_ptr; + void *stack_page; + + stack_page = __get_cpu_var(kvm_arm_hyp_stack_page); + hyp_stack_ptr = (unsigned long)stack_page + PAGE_SIZE; + + cpu_set_vector(vector); + + /* + * Call initialization code + */ + asm volatile ( + "mov r0, %[pgd_ptr]\n\t" + "mov r1, %[stack_ptr]\n\t" + "hvc #0\n\t" : : + [pgd_ptr] "r" (virt_to_phys(kvm_hyp_pgd)), + [stack_ptr] "r" (hyp_stack_ptr) : + "r0", "r1"); +} + /** - * Inits Hyp-mode on a single CPU + * Inits Hyp-mode on all online CPUs */ static int init_hyp_mode(void) { phys_addr_t init_phys_addr, init_end_phys_addr; - unsigned long hyp_stack_ptr; int err = 0; + int cpu; /* * Allocate Hyp level-1 page table @@ -522,47 +573,42 @@ static int init_hyp_mode(void) return -ENOMEM; /* - * Allocate stack page for Hypervisor-mode + * Allocate stack pages for Hypervisor-mode */ - kvm_arm_hyp_stack_page = (void *)__get_free_page(GFP_KERNEL); - if (!kvm_arm_hyp_stack_page) { - err = -ENOMEM; - goto out_free_pgd; - } + for_each_possible_cpu(cpu) { + void *stack_page; - hyp_stack_ptr = (unsigned long)kvm_arm_hyp_stack_page + PAGE_SIZE; + stack_page = (void *)__get_free_page(GFP_KERNEL); + if (!stack_page) { + err = -ENOMEM; + goto out_free_pgd; + } + + per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; + } init_phys_addr = virt_to_phys(__kvm_hyp_init); init_end_phys_addr = virt_to_phys(__kvm_hyp_init_end); + BUG_ON(init_phys_addr & 0x1f); /* - * Create identity mapping + * Create identity mapping for the init code. */ hyp_identity_mapping_add(kvm_hyp_pgd, (unsigned long)init_phys_addr, (unsigned long)init_end_phys_addr); /* - * Set the HVBAR - */ - BUG_ON(init_phys_addr & 0x1f); - asm volatile ( - "mov r0, %[vector_ptr]\n\t" - "ldr r7, =SMCHYP_HVBAR_W\n\t" - "smc #0\n\t" : : - [vector_ptr] "r" ((unsigned long)init_phys_addr) : - "r0", "r7"); - - /* - * Call initialization code + * Execute the init code on each CPU. + * + * Note: The stack is not mapped yet, so don't do anything else than + * initializing the hypervisor mode on each CPU using a local stack + * space for temporary storage. */ - asm volatile ( - "mov r0, %[pgd_ptr]\n\t" - "mov r1, %[stack_ptr]\n\t" - "hvc #0\n\t" : : - [pgd_ptr] "r" (virt_to_phys(kvm_hyp_pgd)), - [stack_ptr] "r" (hyp_stack_ptr) : - "r0", "r1"); + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, cpu_init_hyp_mode, + (void *)(long)init_phys_addr, 1); + } /* * Unmap the identity mapping @@ -572,37 +618,6 @@ static int init_hyp_mode(void) (unsigned long)init_end_phys_addr); /* - * Set the HVBAR to the virtual kernel address - */ - asm volatile ( - "mov r0, %[vector_ptr]\n\t" - "ldr r7, =SMCHYP_HVBAR_W\n\t" - "smc #0\n\t" : : - [vector_ptr] "r" (__kvm_hyp_vector) : - "r0", "r7"); - - return err; -out_free_pgd: - kfree(kvm_hyp_pgd); - kvm_hyp_pgd = NULL; - return err; -} - -/* - * Initializes the memory mappings used in Hyp-mode - * - * Code executed in Hyp-mode and a stack page per cpu must be mapped into the - * hypervisor translation tables. - * - * Currently there is no SMP support so we map only a single stack page on a - * single CPU. - */ -static int init_hyp_memory(void) -{ - int err = 0; - char *stack_page; - - /* * Map Hyp exception vectors */ err = create_hyp_mappings(kvm_hyp_pgd, @@ -623,19 +638,35 @@ static int init_hyp_memory(void) } /* - * Map the Hyp stack page + * Map the Hyp stack pages */ - stack_page = kvm_arm_hyp_stack_page; - err = create_hyp_mappings(kvm_hyp_pgd, - stack_page, stack_page + PAGE_SIZE); - if (err) { - kvm_err(err, "Cannot map hyp stack"); - goto out_free_mappings; + for_each_possible_cpu(cpu) { + char *stack_page = per_cpu(kvm_arm_hyp_stack_page, cpu); + err = create_hyp_mappings(kvm_hyp_pgd, + stack_page, stack_page + PAGE_SIZE); + + if (err) { + kvm_err(err, "Cannot map hyp stack"); + goto out_free_mappings; + } } - return err; + /* + * Set the HVBAR to the virtual kernel address + */ + for_each_online_cpu(cpu) + smp_call_function_single(cpu, cpu_set_vector, + __kvm_hyp_vector, 1); + + return 0; + out_free_mappings: free_hyp_pmds(kvm_hyp_pgd); + for_each_possible_cpu(cpu) + free_page((unsigned long)per_cpu(kvm_arm_hyp_stack_page, cpu)); +out_free_pgd: + kfree(kvm_hyp_pgd); + kvm_hyp_pgd = NULL; return err; } @@ -650,10 +681,6 @@ int kvm_arch_init(void *opaque) if (err) goto out_err; - err = init_hyp_memory(); - if (err) - goto out_err; - set_bit(0, kvm_vmids); return 0; out_err: diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index f60c75a..3cb2fe4 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -347,8 +347,10 @@ unsupp_err_out: int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run) { trace_kvm_wfi(vcpu->arch.regs.pc); + spin_lock(&vcpu->arch.irq_lock); if (!vcpu->arch.virt_irq) vcpu->arch.wait_for_interrupts = 1; + spin_unlock(&vcpu->arch.irq_lock); return 0; } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 7732c48..50c9571 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -26,6 +26,7 @@ #include "debug.h" pgd_t *kvm_hyp_pgd; +DEFINE_MUTEX(kvm_hyp_pgd_mutex); static void free_ptes(pmd_t *pmd, unsigned long addr) { @@ -55,6 +56,7 @@ void free_hyp_pmds(pgd_t *hyp_pgd) pmd_t *pmd; unsigned long addr, next, end; + mutex_lock(&kvm_hyp_pgd_mutex); addr = PAGE_OFFSET; end = ~0; do { @@ -71,6 +73,7 @@ void free_hyp_pmds(pgd_t *hyp_pgd) free_ptes(pmd, addr); pmd_free(NULL, pmd); } while (addr = next, addr != end); + mutex_unlock(&kvm_hyp_pgd_mutex); } static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long addr, @@ -140,6 +143,7 @@ int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to) if (start < PAGE_OFFSET) return -EINVAL; + mutex_lock(&kvm_hyp_pgd_mutex); addr = start; do { next = pgd_addr_end(addr, end); @@ -150,7 +154,8 @@ int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to) pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err(-ENOMEM, "Cannot allocate Hyp pmd"); - return -ENOMEM; + err = -ENOMEM; + goto out; } pud_populate(NULL, pud, pmd); } @@ -160,6 +165,8 @@ int create_hyp_mappings(pgd_t *hyp_pgd, void *from, void *to) return err; } while (addr = next, addr < end); +out: + mutex_unlock(&kvm_hyp_pgd_mutex); return err; } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html