On Sat, Oct 20, 2012 at 6:27 AM, Marc Zyngier <marc.zyngier@xxxxxxx> wrote: > On Sat, 20 Oct 2012 00:14:42 -0400, Christoffer Dall > <c.dall@xxxxxxxxxxxxxxxxxxxxxx> wrote: >> The vgic virtual cpu and emulated distributor interfaces must >> be mapped at a given physical address in the guest. This address is >> provided through the KVM_SET_DEVICE_ADDRESS ioctl, which happens after >> the KVM_CREATE_IRQCHIP ioctl is called, but before the first VCPU is >> excuted thorugh KVM_RUN. We create the vgic on KVM_CREATE_IRQCHIP, but >> query kvm_vgic_ready(kvm), which checks if the vgic.vctrl_base field has >> been set, before we execute a VCPU, and if it has not been set, we call >> kvm_vgic_init, which takes care of the remaining setup. >> >> We use the IS_VGIC_ADDR_UNDEF() macro, which compares to the >> VGIC_ADDR_UNDEF constant, to check if an address has been set; it's >> unlikely that a device will sit on address 0, but since this is a part >> of main kernel boot procedure if this stuff is enabled in the config, >> I'm being paranoid. >> >> The distributor and vcpu base addresses used to be a per-host setting >> global for all VMs, but this is not a requirement and when we want to >> emulate several boards on a single host, we need the flexibility of >> storing these guest addresses on a per-VM basis. >> >> Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> >> --- >> arch/arm/include/asm/kvm_vgic.h | 21 ++++++++-- >> arch/arm/kvm/arm.c | 10 ++++- >> arch/arm/kvm/vgic.c | 82 >> +++++++++++++++++++++++++++------------ >> 3 files changed, 84 insertions(+), 29 deletions(-) >> >> diff --git a/arch/arm/include/asm/kvm_vgic.h >> b/arch/arm/include/asm/kvm_vgic.h >> index a688132..2de167f 100644 >> --- a/arch/arm/include/asm/kvm_vgic.h >> +++ b/arch/arm/include/asm/kvm_vgic.h >> @@ -154,13 +154,14 @@ static inline void vgic_bytemap_set_irq_val(struct >> vgic_bytemap *x, >> struct vgic_dist { >> #ifdef CONFIG_KVM_ARM_VGIC >> spinlock_t lock; >> + bool ready; >> >> /* Virtual control interface mapping */ >> void __iomem *vctrl_base; >> >> - /* Distributor mapping in the guest */ >> - unsigned long vgic_dist_base; >> - unsigned long vgic_dist_size; >> + /* Distributor and vcpu interface mapping in the guest */ >> + phys_addr_t vgic_dist_base; >> + phys_addr_t vgic_cpu_base; >> >> /* Distributor enabled */ >> u32 enabled; >> @@ -243,6 +244,7 @@ struct kvm_exit_mmio; >> #ifdef CONFIG_KVM_ARM_VGIC >> int kvm_vgic_hyp_init(void); >> int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); >> +int kvm_vgic_create(struct kvm *kvm); >> int kvm_vgic_init(struct kvm *kvm); >> void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); >> void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu); >> @@ -252,8 +254,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, >> unsigned int irq_num, >> int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); >> bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, >> struct kvm_exit_mmio *mmio); >> +bool irqchip_in_kernel(struct kvm *kvm); >> >> -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) >> +#define vgic_initialized(k) ((k)->arch.vgic.ready) >> #define >> vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) > == >> 0) >> >> #else >> @@ -267,6 +270,11 @@ static inline int kvm_vgic_set_addr(struct kvm > *kvm, >> unsigned long type, u64 add >> return 0; >> } >> >> +static inline int kvm_vgic_create(struct kvm *kvm) >> +{ >> + return 0; >> +} >> + >> static inline int kvm_vgic_init(struct kvm *kvm) >> { >> return 0; >> @@ -298,6 +306,11 @@ static inline int irqchip_in_kernel(struct kvm > *kvm) >> return 0; >> } >> >> +static inline bool kvm_vgic_initialized(struct kvm *kvm) >> +{ >> + return true; >> +} >> + >> static inline int vgic_active_irq(struct kvm_vcpu *vcpu) >> { >> return 0; >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c >> index 282794e..d64783e 100644 >> --- a/arch/arm/kvm/arm.c >> +++ b/arch/arm/kvm/arm.c >> @@ -636,6 +636,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, >> struct kvm_run *run) >> if (unlikely(vcpu->arch.target < 0)) >> return -ENOEXEC; >> >> + /* Initalize the VGIC before running the vcpu */ >> + if (unlikely(irqchip_in_kernel(vcpu->kvm) && >> + !vgic_initialized(vcpu->kvm))) { >> + ret = kvm_vgic_init(vcpu->kvm); >> + if (ret) >> + return ret; >> + } >> + >> if (run->exit_reason == KVM_EXIT_MMIO) { >> ret = kvm_handle_mmio_return(vcpu, vcpu->run); >> if (ret) >> @@ -889,7 +897,7 @@ long kvm_arch_vm_ioctl(struct file *filp, >> #ifdef CONFIG_KVM_ARM_VGIC >> case KVM_CREATE_IRQCHIP: { >> if (vgic_present) >> - return kvm_vgic_init(kvm); >> + return kvm_vgic_create(kvm); >> else >> return -EINVAL; >> } >> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c >> index d63b7f8..fa591db 100644 >> --- a/arch/arm/kvm/vgic.c >> +++ b/arch/arm/kvm/vgic.c >> @@ -65,12 +65,17 @@ >> * interrupt line to be sampled again. >> */ >> >> -/* Temporary hacks, need to be provided by userspace emulation */ >> -#define VGIC_DIST_BASE 0x2c001000 >> +#define VGIC_ADDR_UNDEF (-1) >> +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == (typeof(_x))VGIC_ADDR_UNDEF) > > This is an awkward construct. You should really know the type of what > you're checking. Consider: I don't think it's awkward, but a pretty common construct. I can't know which type I check when it's used for both a pointer and a phys_addr_t. > > #define VGIC_ADDR_UNDEF ((phys_addr_t)(~0UL) > #define IS_VGIC_ADDR_UNDEF(x) ((x) == VGIC_ADDR_UNDEF) > >> + >> + >> #define VGIC_DIST_SIZE 0x1000 >> #define VGIC_CPU_BASE 0x2c002000 >> #define VGIC_CPU_SIZE 0x2000 >> >> +/* Physical address of vgic virtual cpu interface */ >> +static phys_addr_t vgic_vcpu_base; >> + >> /* Virtual control interface base address */ >> static void __iomem *vgic_vctrl_base; >> >> @@ -538,7 +543,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct >> kvm_run *run, struct kvm_exi >> >> if (!irqchip_in_kernel(vcpu->kvm) || >> mmio->phys_addr < base || >> - (mmio->phys_addr + mmio->len) > (base + dist->vgic_dist_size)) >> + (mmio->phys_addr + mmio->len) > (base + VGIC_DIST_SIZE)) >> return false; >> >> range = find_matching_range(vgic_ranges, mmio, base); >> @@ -1027,7 +1032,7 @@ void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) >> vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; >> } >> >> - BUG_ON(!vcpu->kvm->arch.vgic.vctrl_base); >> + BUG_ON(IS_VGIC_ADDR_UNDEF(vcpu->kvm->arch.vgic.vctrl_base)); >> reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VTR); >> vgic_cpu->nr_lr = (reg & 0x1f) + 1; >> >> @@ -1101,29 +1106,23 @@ out_free_irq: >> >> int kvm_vgic_init(struct kvm *kvm) >> { >> - int ret, i; >> - struct resource vcpu_res; >> + int ret = 0, i; >> >> mutex_lock(&kvm->lock); >> >> - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { >> - kvm_err("Cannot obtain VCPU resource\n"); >> - ret = -ENXIO; >> + if (vgic_initialized(kvm)) >> goto out; >> - } >> >> - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { >> - ret = -EEXIST; >> + if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || >> + IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { >> + kvm_err("Need to set vgic cpu and dist addresses first\n"); >> + ret = -ENXIO; >> goto out; >> } >> >> - spin_lock_init(&kvm->arch.vgic.lock); >> - kvm->arch.vgic.vctrl_base = vgic_vctrl_base; >> - kvm->arch.vgic.vgic_dist_base = VGIC_DIST_BASE; >> - kvm->arch.vgic.vgic_dist_size = VGIC_DIST_SIZE; >> + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, >> + vgic_vcpu_base, VGIC_CPU_SIZE); >> >> - ret = kvm_phys_addr_ioremap(kvm, VGIC_CPU_BASE, >> - vcpu_res.start, VGIC_CPU_SIZE); >> if (ret) { >> kvm_err("Unable to remap VGIC CPU to VCPU\n"); >> goto out; >> @@ -1132,12 +1131,45 @@ int kvm_vgic_init(struct kvm *kvm) >> for (i = 32; i < VGIC_NR_IRQS; i += 4) >> vgic_set_target_reg(kvm, 0, i); >> >> + kvm_timer_init(kvm); >> + kvm->arch.vgic.ready = true; >> out: >> mutex_unlock(&kvm->lock); >> + return ret; >> +} >> >> - if (!ret) >> - kvm_timer_init(kvm); >> +bool irqchip_in_kernel(struct kvm *kvm) >> +{ >> + return !(IS_VGIC_ADDR_UNDEF(vgic_vcpu_base)); > > Erm... This check is supposed VM specific. Here, you check something that > will be initialized once and for all when the first VM is run. Use one of > the vgic *guest* base addresses instead. > Ouch, right you are. (This whole scheme of checking various base addresses for whether something is initialized or not seems quite brittle, but I don't want to rework it before a merge) >> +} >> >> +int kvm_vgic_create(struct kvm *kvm) >> +{ >> + int ret; >> + struct resource vcpu_res; >> + >> + mutex_lock(&kvm->lock); >> + >> + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { >> + kvm_err("Cannot obtain VCPU resource\n"); >> + ret = -ENXIO; >> + goto out; >> + } > > This function is called on every VM creation. Consider moving this lookup > and the vgic_vcpu_base assignment to kvm_vgic_hyp_init(). > >> + if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { >> + ret = -EEXIST; >> + goto out; >> + } >> + >> + spin_lock_init(&kvm->arch.vgic.lock); >> + kvm->arch.vgic.vctrl_base = vgic_vctrl_base; >> + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; >> + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; >> + >> + vgic_vcpu_base = vcpu_res.start; >> + ret = 0; >> +out: >> + mutex_unlock(&kvm->lock); >> return ret; >> } >> >> @@ -1151,12 +1183,14 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned >> long type, u64 addr) >> mutex_lock(&kvm->lock); >> switch (type) { >> case KVM_VGIC_V2_ADDR_TYPE_DIST: >> - if (addr != VGIC_DIST_BASE) >> - return -EINVAL; >> + if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base)) >> + return -EEXIST; >> + kvm->arch.vgic.vgic_dist_base = addr; >> break; >> case KVM_VGIC_V2_ADDR_TYPE_CPU: >> - if (addr != VGIC_CPU_BASE) >> - return -EINVAL; >> + if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) >> + return -EEXIST; >> + kvm->arch.vgic.vgic_cpu_base = addr; >> break; >> default: >> r = -ENODEV; > > We need some additional checks here about the validity of the address (at > least page aligned, and non overlapping with memory or other in-kernel > device models). > since we don't have any other in-kernel device models that are memory mapped, we should cross that bridge when/if we get there. It will not affect stability of the host kernel, merely not properly boot a guest if you supply bogus overlapping io regions. See patch diff: diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h index 2de167f..3c71c79 100644 --- a/arch/arm/include/asm/kvm_vgic.h +++ b/arch/arm/include/asm/kvm_vgic.h @@ -256,6 +256,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); bool irqchip_in_kernel(struct kvm *kvm); +#define irqchip_in_kernel(k) ((k)->arch.vgic.vctrl_base) #define vgic_initialized(k) ((k)->arch.vgic.ready) #define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 0ab098e..e5ace0e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -422,7 +422,7 @@ static void stage2_clear_pte(struct kvm *kvm, phys_addr_t addr) } static void stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, - phys_addr_t addr, const pte_t *new_pte) + phys_addr_t addr, const pte_t *new_pte, bool iomap) { pgd_t *pgd; pud_t *pud; @@ -454,6 +454,9 @@ static void stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, } else pte = pte_offset_kernel(pmd, addr); + if (iomap && pte_present(old_pte)) + return -EFAULT; + /* Create 2nd stage page table mapping - Level 3 */ old_pte = *pte; set_pte_ext(pte, *new_pte, 0); @@ -489,7 +492,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (ret) goto out; spin_lock(&kvm->mmu_lock); - stage2_set_pte(kvm, &cache, addr, &pte); + stage2_set_pte(kvm, &cache, addr, &pte, true); spin_unlock(&kvm->mmu_lock); pfn++; @@ -565,7 +568,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, pte_val(new_pte) |= L_PTE_S2_RDWR; kvm_set_pfn_dirty(pfn); } - stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte); + stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); @@ -716,7 +719,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) { pte_t *pte = (pte_t *)data; - stage2_set_pte(kvm, NULL, gpa, pte); + stage2_set_pte(kvm, NULL, gpa, pte, false); } diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index fa591db..8fe0e70 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -1054,6 +1054,7 @@ int kvm_vgic_hyp_init(void) int ret; unsigned int irq; struct resource vctrl_res; + struct resource vcpu_res; vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); if (!vgic_node) @@ -1094,6 +1095,13 @@ int kvm_vgic_hyp_init(void) kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, irq); on_each_cpu(vgic_init_maintenance_interrupt, &irq, 1); + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { + kvm_err("Cannot obtain VCPU resource\n"); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; + return 0; out_unmap: @@ -1138,24 +1146,12 @@ out: return ret; } -bool irqchip_in_kernel(struct kvm *kvm) -{ - return !(IS_VGIC_ADDR_UNDEF(vgic_vcpu_base)); -} - int kvm_vgic_create(struct kvm *kvm) { int ret; - struct resource vcpu_res; mutex_lock(&kvm->lock); - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { - kvm_err("Cannot obtain VCPU resource\n"); - ret = -ENXIO; - goto out; - } - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { ret = -EEXIST; goto out; @@ -1166,36 +1162,62 @@ int kvm_vgic_create(struct kvm *kvm) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - vgic_vcpu_base = vcpu_res.start; ret = 0; out: mutex_unlock(&kvm->lock); return ret; } +static bool vgic_ioaddr_overlap(struct kvm *kvm) +{ + phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; + phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; + + if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) + return false; + if ((dist <= cpu && dist + VGIC_DIST_SIZE > cpu) || + (cpu <= dist && cpu + VGIC_CPU_SIZE > dist)) + return true; + return false; +} + int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) { int r = 0; + struct vgic_dist *vgic = &kvm->arch.vgic; if (addr & ~KVM_PHYS_MASK) return -E2BIG; + if (addr & ~PAGE_MASK) + return -EINVAL; + mutex_lock(&kvm->lock); switch (type) { case KVM_VGIC_V2_ADDR_TYPE_DIST: - if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base)) + if (!IS_VGIC_ADDR_UNDEF(vgic->vgic_dist_base)) return -EEXIST; + if (addr + VGIC_DIST_SIZE < addr) + return -EINVAL; kvm->arch.vgic.vgic_dist_base = addr; break; case KVM_VGIC_V2_ADDR_TYPE_CPU: - if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) + if (!IS_VGIC_ADDR_UNDEF(vgic->vgic_cpu_base)) return -EEXIST; + if (addr + VGIC_CPU_SIZE < addr) + return -EINVAL; kvm->arch.vgic.vgic_cpu_base = addr; break; default: r = -ENODEV; } + if (vgic_ioaddr_overlap(kvm)) { + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + return -EINVAL; + } + mutex_unlock(&kvm->lock); return r; } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html