Re: [kvmarm] [PATCH 2/2] KVM: ARM: Defer parts of the vgic init until first KVM_RUN

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Oct 20, 2012 at 6:27 AM, Marc Zyngier <marc.zyngier@xxxxxxx> wrote:
> On Sat, 20 Oct 2012 00:14:42 -0400, Christoffer Dall
> <c.dall@xxxxxxxxxxxxxxxxxxxxxx> wrote:
>> The vgic virtual cpu and emulated distributor interfaces must
>> be mapped at a given physical address in the guest.  This address is
>> provided through the KVM_SET_DEVICE_ADDRESS ioctl, which happens after
>> the KVM_CREATE_IRQCHIP ioctl is called, but before the first VCPU is
>> excuted thorugh KVM_RUN.  We create the vgic on KVM_CREATE_IRQCHIP, but
>> query kvm_vgic_ready(kvm), which checks if the vgic.vctrl_base field has
>> been set, before we execute a VCPU, and if it has not been set, we call
>> kvm_vgic_init, which takes care of the remaining setup.
>>
>> We use the IS_VGIC_ADDR_UNDEF() macro, which compares to the
>> VGIC_ADDR_UNDEF constant, to check if an address has been set;  it's
>> unlikely that a device will sit on address 0, but since this is a part
>> of main kernel boot procedure if this stuff is enabled in the config,
>> I'm being paranoid.
>>
>> The distributor and vcpu base addresses used to be a per-host setting
>> global for all VMs, but this is not a requirement and when we want to
>> emulate several boards on a single host, we need the flexibility of
>> storing these guest addresses on a per-VM basis.
>>
>> Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx>
>> ---
>>  arch/arm/include/asm/kvm_vgic.h |   21 ++++++++--
>>  arch/arm/kvm/arm.c              |   10 ++++-
>>  arch/arm/kvm/vgic.c             |   82
>>  +++++++++++++++++++++++++++------------
>>  3 files changed, 84 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_vgic.h
>> b/arch/arm/include/asm/kvm_vgic.h
>> index a688132..2de167f 100644
>> --- a/arch/arm/include/asm/kvm_vgic.h
>> +++ b/arch/arm/include/asm/kvm_vgic.h
>> @@ -154,13 +154,14 @@ static inline void vgic_bytemap_set_irq_val(struct
>> vgic_bytemap *x,
>>  struct vgic_dist {
>>  #ifdef CONFIG_KVM_ARM_VGIC
>>       spinlock_t              lock;
>> +     bool                    ready;
>>
>>       /* Virtual control interface mapping */
>>       void __iomem            *vctrl_base;
>>
>> -     /* Distributor mapping in the guest */
>> -     unsigned long           vgic_dist_base;
>> -     unsigned long           vgic_dist_size;
>> +     /* Distributor and vcpu interface mapping in the guest */
>> +     phys_addr_t             vgic_dist_base;
>> +     phys_addr_t             vgic_cpu_base;
>>
>>       /* Distributor enabled */
>>       u32                     enabled;
>> @@ -243,6 +244,7 @@ struct kvm_exit_mmio;
>>  #ifdef CONFIG_KVM_ARM_VGIC
>>  int kvm_vgic_hyp_init(void);
>>  int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
>> +int kvm_vgic_create(struct kvm *kvm);
>>  int kvm_vgic_init(struct kvm *kvm);
>>  void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
>>  void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu);
>> @@ -252,8 +254,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
>> unsigned int irq_num,
>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>  bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>                     struct kvm_exit_mmio *mmio);
>> +bool irqchip_in_kernel(struct kvm *kvm);
>>
>> -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base))
>> +#define vgic_initialized(k)  ((k)->arch.vgic.ready)
>>  #define
>>  vgic_active_irq(v)   (atomic_read(&(v)->arch.vgic_cpu.irq_active_count)
> ==
>>  0)
>>
>>  #else
>> @@ -267,6 +270,11 @@ static inline int kvm_vgic_set_addr(struct kvm
> *kvm,
>> unsigned long type, u64 add
>>       return 0;
>>  }
>>
>> +static inline int kvm_vgic_create(struct kvm *kvm)
>> +{
>> +     return 0;
>> +}
>> +
>>  static inline int kvm_vgic_init(struct kvm *kvm)
>>  {
>>       return 0;
>> @@ -298,6 +306,11 @@ static inline int irqchip_in_kernel(struct kvm
> *kvm)
>>       return 0;
>>  }
>>
>> +static inline bool kvm_vgic_initialized(struct kvm *kvm)
>> +{
>> +     return true;
>> +}
>> +
>>  static inline int vgic_active_irq(struct kvm_vcpu *vcpu)
>>  {
>>       return 0;
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index 282794e..d64783e 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -636,6 +636,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu,
>> struct kvm_run *run)
>>       if (unlikely(vcpu->arch.target < 0))
>>               return -ENOEXEC;
>>
>> +     /* Initalize the VGIC before running the vcpu */
>> +     if (unlikely(irqchip_in_kernel(vcpu->kvm) &&
>> +                  !vgic_initialized(vcpu->kvm))) {
>> +             ret = kvm_vgic_init(vcpu->kvm);
>> +             if (ret)
>> +                     return ret;
>> +     }
>> +
>>       if (run->exit_reason == KVM_EXIT_MMIO) {
>>               ret = kvm_handle_mmio_return(vcpu, vcpu->run);
>>               if (ret)
>> @@ -889,7 +897,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>  #ifdef CONFIG_KVM_ARM_VGIC
>>       case KVM_CREATE_IRQCHIP: {
>>               if (vgic_present)
>> -                     return kvm_vgic_init(kvm);
>> +                     return kvm_vgic_create(kvm);
>>               else
>>                       return -EINVAL;
>>       }
>> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
>> index d63b7f8..fa591db 100644
>> --- a/arch/arm/kvm/vgic.c
>> +++ b/arch/arm/kvm/vgic.c
>> @@ -65,12 +65,17 @@
>>   *   interrupt line to be sampled again.
>>   */
>>
>> -/* Temporary hacks, need to be provided by userspace emulation */
>> -#define VGIC_DIST_BASE               0x2c001000
>> +#define VGIC_ADDR_UNDEF              (-1)
>> +#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == (typeof(_x))VGIC_ADDR_UNDEF)
>
> This is an awkward construct. You should really know the type of what
> you're checking. Consider:

I don't think it's awkward, but a pretty common construct. I can't
know which type I check when it's used for both a pointer and a
phys_addr_t.

>
> #define VGIC_ADDR_UNDEF ((phys_addr_t)(~0UL)
> #define IS_VGIC_ADDR_UNDEF(x) ((x) == VGIC_ADDR_UNDEF)
>
>> +
>> +
>>  #define VGIC_DIST_SIZE               0x1000
>>  #define VGIC_CPU_BASE                0x2c002000
>>  #define VGIC_CPU_SIZE                0x2000
>>
>> +/* Physical address of vgic virtual cpu interface */
>> +static phys_addr_t vgic_vcpu_base;
>> +
>>  /* Virtual control interface base address */
>>  static void __iomem *vgic_vctrl_base;
>>
>> @@ -538,7 +543,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct
>> kvm_run *run, struct kvm_exi
>>
>>       if (!irqchip_in_kernel(vcpu->kvm) ||
>>           mmio->phys_addr < base ||
>> -         (mmio->phys_addr + mmio->len) > (base + dist->vgic_dist_size))
>> +         (mmio->phys_addr + mmio->len) > (base + VGIC_DIST_SIZE))
>>               return false;
>>
>>       range = find_matching_range(vgic_ranges, mmio, base);
>> @@ -1027,7 +1032,7 @@ void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>>               vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
>>       }
>>
>> -     BUG_ON(!vcpu->kvm->arch.vgic.vctrl_base);
>> +     BUG_ON(IS_VGIC_ADDR_UNDEF(vcpu->kvm->arch.vgic.vctrl_base));
>>       reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VTR);
>>       vgic_cpu->nr_lr = (reg & 0x1f) + 1;
>>
>> @@ -1101,29 +1106,23 @@ out_free_irq:
>>
>>  int kvm_vgic_init(struct kvm *kvm)
>>  {
>> -     int ret, i;
>> -     struct resource vcpu_res;
>> +     int ret = 0, i;
>>
>>       mutex_lock(&kvm->lock);
>>
>> -     if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
>> -             kvm_err("Cannot obtain VCPU resource\n");
>> -             ret = -ENXIO;
>> +     if (vgic_initialized(kvm))
>>               goto out;
>> -     }
>>
>> -     if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
>> -             ret = -EEXIST;
>> +     if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
>> +         IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
>> +             kvm_err("Need to set vgic cpu and dist addresses first\n");
>> +             ret = -ENXIO;
>>               goto out;
>>       }
>>
>> -     spin_lock_init(&kvm->arch.vgic.lock);
>> -     kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
>> -     kvm->arch.vgic.vgic_dist_base = VGIC_DIST_BASE;
>> -     kvm->arch.vgic.vgic_dist_size = VGIC_DIST_SIZE;
>> +     ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
>> +                                 vgic_vcpu_base, VGIC_CPU_SIZE);
>>
>> -     ret = kvm_phys_addr_ioremap(kvm, VGIC_CPU_BASE,
>> -                                 vcpu_res.start, VGIC_CPU_SIZE);
>>       if (ret) {
>>               kvm_err("Unable to remap VGIC CPU to VCPU\n");
>>               goto out;
>> @@ -1132,12 +1131,45 @@ int kvm_vgic_init(struct kvm *kvm)
>>       for (i = 32; i < VGIC_NR_IRQS; i += 4)
>>               vgic_set_target_reg(kvm, 0, i);
>>
>> +     kvm_timer_init(kvm);
>> +     kvm->arch.vgic.ready = true;
>>  out:
>>       mutex_unlock(&kvm->lock);
>> +     return ret;
>> +}
>>
>> -     if (!ret)
>> -             kvm_timer_init(kvm);
>> +bool irqchip_in_kernel(struct kvm *kvm)
>> +{
>> +     return !(IS_VGIC_ADDR_UNDEF(vgic_vcpu_base));
>
> Erm... This check is supposed VM specific. Here, you check something that
> will be initialized once and for all when the first VM is run. Use one of
> the vgic *guest* base addresses instead.
>

Ouch, right you are.

(This whole scheme of checking various base addresses for whether
something is initialized or not seems quite brittle, but I don't want
to rework it before a merge)

>> +}
>>
>> +int kvm_vgic_create(struct kvm *kvm)
>> +{
>> +     int ret;
>> +     struct resource vcpu_res;
>> +
>> +     mutex_lock(&kvm->lock);
>> +
>> +     if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
>> +             kvm_err("Cannot obtain VCPU resource\n");
>> +             ret = -ENXIO;
>> +             goto out;
>> +     }
>
> This function is called on every VM creation. Consider moving this lookup
> and the vgic_vcpu_base assignment to kvm_vgic_hyp_init().
>
>> +     if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
>> +             ret = -EEXIST;
>> +             goto out;
>> +     }
>> +
>> +     spin_lock_init(&kvm->arch.vgic.lock);
>> +     kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
>> +     kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
>> +     kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
>> +
>> +     vgic_vcpu_base = vcpu_res.start;
>> +     ret = 0;
>> +out:
>> +     mutex_unlock(&kvm->lock);
>>       return ret;
>>  }
>>
>> @@ -1151,12 +1183,14 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned
>> long type, u64 addr)
>>       mutex_lock(&kvm->lock);
>>       switch (type) {
>>       case KVM_VGIC_V2_ADDR_TYPE_DIST:
>> -             if (addr != VGIC_DIST_BASE)
>> -                     return -EINVAL;
>> +             if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base))
>> +                     return -EEXIST;
>> +             kvm->arch.vgic.vgic_dist_base = addr;
>>               break;
>>       case KVM_VGIC_V2_ADDR_TYPE_CPU:
>> -             if (addr != VGIC_CPU_BASE)
>> -                     return -EINVAL;
>> +             if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base))
>> +                     return -EEXIST;
>> +             kvm->arch.vgic.vgic_cpu_base = addr;
>>               break;
>>       default:
>>               r = -ENODEV;
>
> We need some additional checks here about the validity of the address (at
> least page aligned, and non overlapping with memory or other in-kernel
> device models).
>

since we don't have any other in-kernel device models that are memory
mapped, we should cross that bridge when/if we get there. It will not
affect stability of the host kernel, merely not properly boot a guest
if you supply bogus overlapping io regions.

See patch diff:

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index 2de167f..3c71c79 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -256,6 +256,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu,
struct kvm_run *run,
 		      struct kvm_exit_mmio *mmio);
 bool irqchip_in_kernel(struct kvm *kvm);

+#define irqchip_in_kernel(k)	((k)->arch.vgic.vctrl_base)
 #define vgic_initialized(k)	((k)->arch.vgic.ready)
 #define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count)
== 0)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 0ab098e..e5ace0e 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -422,7 +422,7 @@ static void stage2_clear_pte(struct kvm *kvm,
phys_addr_t addr)
 }

 static void stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-			   phys_addr_t addr, const pte_t *new_pte)
+			   phys_addr_t addr, const pte_t *new_pte, bool iomap)
 {
 	pgd_t *pgd;
 	pud_t *pud;
@@ -454,6 +454,9 @@ static void stage2_set_pte(struct kvm *kvm, struct
kvm_mmu_memory_cache *cache,
 	} else
 		pte = pte_offset_kernel(pmd, addr);

+	if (iomap && pte_present(old_pte))
+		return -EFAULT;
+
 	/* Create 2nd stage page table mapping - Level 3 */
 	old_pte = *pte;
 	set_pte_ext(pte, *new_pte, 0);
@@ -489,7 +492,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm,
phys_addr_t guest_ipa,
 		if (ret)
 			goto out;
 		spin_lock(&kvm->mmu_lock);
-		stage2_set_pte(kvm, &cache, addr, &pte);
+		stage2_set_pte(kvm, &cache, addr, &pte, true);
 		spin_unlock(&kvm->mmu_lock);

 		pfn++;
@@ -565,7 +568,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu,
phys_addr_t fault_ipa,
 		pte_val(new_pte) |= L_PTE_S2_RDWR;
 		kvm_set_pfn_dirty(pfn);
 	}
-	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte);
+	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);

 out_unlock:
 	spin_unlock(&vcpu->kvm->mmu_lock);
@@ -716,7 +719,7 @@ static void kvm_set_spte_handler(struct kvm *kvm,
gpa_t gpa, void *data)
 {
 	pte_t *pte = (pte_t *)data;

-	stage2_set_pte(kvm, NULL, gpa, pte);
+	stage2_set_pte(kvm, NULL, gpa, pte, false);
 }


diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index fa591db..8fe0e70 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -1054,6 +1054,7 @@ int kvm_vgic_hyp_init(void)
 	int ret;
 	unsigned int irq;
 	struct resource vctrl_res;
+	struct resource vcpu_res;

 	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
 	if (!vgic_node)
@@ -1094,6 +1095,13 @@ int kvm_vgic_hyp_init(void)
 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, irq);
 	on_each_cpu(vgic_init_maintenance_interrupt, &irq, 1);

+	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+		kvm_err("Cannot obtain VCPU resource\n");
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+	vgic_vcpu_base = vcpu_res.start;
+
 	return 0;

 out_unmap:
@@ -1138,24 +1146,12 @@ out:
 	return ret;
 }

-bool irqchip_in_kernel(struct kvm *kvm)
-{
-	return !(IS_VGIC_ADDR_UNDEF(vgic_vcpu_base));
-}
-
 int kvm_vgic_create(struct kvm *kvm)
 {
 	int ret;
-	struct resource vcpu_res;

 	mutex_lock(&kvm->lock);

-	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
-		kvm_err("Cannot obtain VCPU resource\n");
-		ret = -ENXIO;
-		goto out;
-	}
-
 	if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
 		ret = -EEXIST;
 		goto out;
@@ -1166,36 +1162,62 @@ int kvm_vgic_create(struct kvm *kvm)
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;

-	vgic_vcpu_base = vcpu_res.start;
 	ret = 0;
 out:
 	mutex_unlock(&kvm->lock);
 	return ret;
 }

+static bool vgic_ioaddr_overlap(struct kvm *kvm)
+{
+	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
+	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
+
+	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
+		return false;
+	if ((dist <= cpu && dist + VGIC_DIST_SIZE > cpu) ||
+	    (cpu <= dist && cpu + VGIC_CPU_SIZE > dist))
+		return true;
+	return false;
+}
+
 int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
 {
 	int r = 0;
+	struct vgic_dist *vgic = &kvm->arch.vgic;

 	if (addr & ~KVM_PHYS_MASK)
 		return -E2BIG;

+	if (addr & ~PAGE_MASK)
+		return -EINVAL;
+
 	mutex_lock(&kvm->lock);
 	switch (type) {
 	case KVM_VGIC_V2_ADDR_TYPE_DIST:
-		if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base))
+		if (!IS_VGIC_ADDR_UNDEF(vgic->vgic_dist_base))
 			return -EEXIST;
+		if (addr + VGIC_DIST_SIZE < addr)
+			return -EINVAL;
 		kvm->arch.vgic.vgic_dist_base = addr;
 		break;
 	case KVM_VGIC_V2_ADDR_TYPE_CPU:
-		if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base))
+		if (!IS_VGIC_ADDR_UNDEF(vgic->vgic_cpu_base))
 			return -EEXIST;
+		if (addr + VGIC_CPU_SIZE < addr)
+			return -EINVAL;
 		kvm->arch.vgic.vgic_cpu_base = addr;
 		break;
 	default:
 		r = -ENODEV;
 	}

+	if (vgic_ioaddr_overlap(kvm)) {
+		kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
+		kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+		return -EINVAL;
+	}
+
 	mutex_unlock(&kvm->lock);
 	return r;
 }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux