Re: [PATCH v4 11/13] ARM: KVM: VGIC initialisation code

Will Deacon <will.deacon@xxxxxxx> · Wed, 5 Dec 2012 10:43:13 +0000

On Sat, Nov 10, 2012 at 03:45:32PM +0000, Christoffer Dall wrote:
> From: Marc Zyngier <marc.zyngier@xxxxxxx>
> 
> Add the init code for the hypervisor, the virtual machine, and
> the virtual CPUs.
> 
> An interrupt handler is also wired to allow the VGIC maintenance
> interrupts, used to deal with level triggered interrupts and LR
> underflows.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>
> Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx>
> ---
>  arch/arm/include/asm/kvm_vgic.h |   11 ++
>  arch/arm/kvm/arm.c              |   14 ++
>  arch/arm/kvm/vgic.c             |  237 +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 258 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
> index 6e3d303..a8e7a93 100644
> --- a/arch/arm/include/asm/kvm_vgic.h
> +++ b/arch/arm/include/asm/kvm_vgic.h
> @@ -154,6 +154,7 @@ static inline void vgic_bytemap_set_irq_val(struct vgic_bytemap *x,
>  struct vgic_dist {
>  #ifdef CONFIG_KVM_ARM_VGIC
>         spinlock_t              lock;
> +       bool                    ready;
> 
>         /* Virtual control interface mapping */
>         void __iomem            *vctrl_base;
> @@ -239,6 +240,10 @@ struct kvm_exit_mmio;
> 
>  #ifdef CONFIG_KVM_ARM_VGIC
>  int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
> +int kvm_vgic_hyp_init(void);
> +int kvm_vgic_init(struct kvm *kvm);
> +int kvm_vgic_create(struct kvm *kvm);
> +void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
>  void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu);
>  void kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu);
>  int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
> @@ -248,6 +253,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>                       struct kvm_exit_mmio *mmio);
> 
>  #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.vctrl_base))
> +#define vgic_initialized(k)    ((k)->arch.vgic.ready)
>  #else
>  static inline int kvm_vgic_hyp_init(void)
>  {
> @@ -294,6 +300,11 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
>  {
>         return 0;
>  }
> +
> +static inline bool vgic_initialized(struct kvm *kvm)
> +{
> +       return true;
> +}
>  #endif

Shouldn't this return false?

> 
>  #endif
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index f43da01..a633d9d 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -187,6 +187,8 @@ int kvm_dev_ioctl_check_extension(long ext)
>         switch (ext) {
>  #ifdef CONFIG_KVM_ARM_VGIC
>         case KVM_CAP_IRQCHIP:
> +               r = vgic_present;
> +               break;
>  #endif
>         case KVM_CAP_USER_MEMORY:
>         case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
> @@ -623,6 +625,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>         if (unlikely(vcpu->arch.target < 0))
>                 return -ENOEXEC;
> 
> +       /* Initalize the VGIC before running a vcpu the first time on this VM */

Initialize

> +       if (unlikely(irqchip_in_kernel(vcpu->kvm) &&

Why unlikely? I'd just drop the hint.

> +                    !vgic_initialized(vcpu->kvm))) {
> +               ret = kvm_vgic_init(vcpu->kvm);
> +               if (ret)
> +                       return ret;
> +       }
> +
>         if (run->exit_reason == KVM_EXIT_MMIO) {
>                 ret = kvm_handle_mmio_return(vcpu, vcpu->run);
>                 if (ret)
> @@ -1024,8 +1034,8 @@ static int init_hyp_mode(void)
>          * Init HYP view of VGIC
>          */
>         err = kvm_vgic_hyp_init();
> -       if (err)
> -               goto out_free_mappings;
> +       if (!err)
> +               vgic_present = true;
> 
>         return 0;

Is that an intended change in behaviour? If kvm_vgic_hyp_init fails, you'll
return 0...

>  out_free_vfp:
> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
> index 70040bb..415ddb8 100644
> --- a/arch/arm/kvm/vgic.c
> +++ b/arch/arm/kvm/vgic.c
> @@ -20,7 +20,14 @@
>  #include <linux/kvm_host.h>
>  #include <linux/interrupt.h>
>  #include <linux/io.h>
> +#include <linux/of.h>
> +#include <linux/of_address.h>
> +#include <linux/of_irq.h>
> +
>  #include <asm/kvm_emulate.h>
> +#include <asm/hardware/gic.h>
> +#include <asm/kvm_arm.h>
> +#include <asm/kvm_mmu.h>
> 
>  /*
>   * How the whole thing works (courtesy of Christoffer Dall):
> @@ -59,11 +66,18 @@
>   */
> 
>  #define VGIC_ADDR_UNDEF                (-1)
> -#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == (typeof(_x))VGIC_ADDR_UNDEF)
> +#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)

Huh?

> 
>  #define VGIC_DIST_SIZE         0x1000
>  #define VGIC_CPU_SIZE          0x2000
> 
> +/* Physical address of vgic virtual cpu interface */
> +static phys_addr_t vgic_vcpu_base;
> +
> +/* Virtual control interface base address */
> +static void __iomem *vgic_vctrl_base;
> +
> +static struct device_node *vgic_node;
> 
>  #define ACCESS_READ_VALUE      (1 << 0)
>  #define ACCESS_READ_RAZ                (0 << 0)
> @@ -527,7 +541,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exi
> 
>         if (!irqchip_in_kernel(vcpu->kvm) ||
>             mmio->phys_addr < base ||
> -           (mmio->phys_addr + mmio->len) > (base + dist->vgic_dist_size))
> +           (mmio->phys_addr + mmio->len) > (base + VGIC_DIST_SIZE))
>                 return false;

Again, this is an odd hunk. It looks like you've accidentally got some older
context lying around in this patch.

> 
>         range = find_matching_range(vgic_ranges, mmio, base);
> @@ -957,6 +971,225 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
>         return 0;
>  }
> 
> +static irqreturn_t vgic_maintenance_handler(int irq, void *data)
> +{
> +       struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
> +       struct vgic_dist *dist;
> +       struct vgic_cpu *vgic_cpu;
> +
> +       if (WARN(!vcpu,
> +                "VGIC interrupt on CPU %d with no vcpu\n", smp_processor_id()))
> +               return IRQ_HANDLED;

We need to get an answer about the potential delaying of this interrupt,
otherwise this might kick more often that we'd like.

> +
> +       vgic_cpu = &vcpu->arch.vgic_cpu;
> +       dist = &vcpu->kvm->arch.vgic;
> +       kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
> +
> +       /*
> +        * We do not need to take the distributor lock here, since the only
> +        * action we perform is clearing the irq_active_bit for an EOIed
> +        * level interrupt.  There is a potential race with
> +        * the queuing of an interrupt in __kvm_sync_to_cpu(), where we check
> +        * if the interrupt is already active. Two possibilities:
> +        *
> +        * - The queuing is occuring on the same vcpu: cannot happen, as we're
> +        *   already in the context of this vcpu, and executing the handler
> +        * - The interrupt has been migrated to another vcpu, and we ignore
> +        *   this interrupt for this run. Big deal. It is still pending though,
> +        *   and will get considered when this vcpu exits.
> +        */

We've already discussed this in an earlier thread.

> +       if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) {
> +               /*
> +                * Some level interrupts have been EOIed. Clear their
> +                * active bit.
> +                */
> +               int lr, irq;
> +
> +               for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
> +                                vgic_cpu->nr_lr) {
> +                       irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID;
> +
> +                       vgic_bitmap_set_irq_val(&dist->irq_active,
> +                                               vcpu->vcpu_id, irq, 0);
> +                       vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI;
> +                       writel_relaxed(vgic_cpu->vgic_lr[lr],
> +                                      dist->vctrl_base + GICH_LR0 + (lr << 2));
> +
> +                       /* Any additionnal pending interrupt? */

additional

> +                       if (vgic_bitmap_get_irq_val(&dist->irq_state,
> +                                                   vcpu->vcpu_id, irq)) {
> +                               set_bit(irq, vcpu->arch.vgic_cpu.pending);
> +                               set_bit(vcpu->vcpu_id,
> +                                       &dist->irq_pending_on_cpu);
> +                       } else {
> +                               clear_bit(irq, vgic_cpu->pending);
> +                       }
> +               }
> +       }
> +
> +       if (vgic_cpu->vgic_misr & VGIC_MISR_U) {
> +               vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE;
> +               writel_relaxed(vgic_cpu->vgic_hcr, dist->vctrl_base + GICH_HCR);
> +       }
> +
> +       return IRQ_HANDLED;
> +}

I wonder whether we could instead simplify the irq handler so it just sets
a flag indicating that the list registers need updating, then we pick that
up on the resume path and keep all the work in one place. Hard to tell.

> +
> +void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> +       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> +       u32 reg;
> +       int i;
> +
> +       if (!irqchip_in_kernel(vcpu->kvm))
> +               return;
> +
> +       for (i = 0; i < VGIC_NR_IRQS; i++) {
> +               if (i < 16)
> +                       vgic_bitmap_set_irq_val(&dist->irq_enabled,
> +                                               vcpu->vcpu_id, i, 1);
> +               if (i < 32)
> +                       vgic_bitmap_set_irq_val(&dist->irq_cfg,
> +                                               vcpu->vcpu_id, i, 1);
> +

Usual comments here :)

> +               vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
> +       }
> +
> +       BUG_ON(!vcpu->kvm->arch.vgic.vctrl_base);
> +       reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VTR);
> +       vgic_cpu->nr_lr = (reg & 0x1f) + 1;
> +
> +       reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VMCR);
> +       vgic_cpu->vgic_vmcr = reg | (0x1f << 27); /* Priority */

Why isn't this defined with the other registers?

> +
> +       vgic_cpu->vgic_hcr |= VGIC_HCR_EN; /* Get the show on the road... */
> +}
> +
> +static void vgic_init_maintenance_interrupt(void *info)
> +{
> +       unsigned int *irqp = info;
> +
> +       enable_percpu_irq(*irqp, 0);
> +}
> +
> +int kvm_vgic_hyp_init(void)
> +{
> +       int ret;
> +       unsigned int irq;
> +       struct resource vctrl_res;
> +       struct resource vcpu_res;
> +
> +       vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
> +       if (!vgic_node)
> +               return -ENODEV;
> +
> +       irq = irq_of_parse_and_map(vgic_node, 0);
> +       if (!irq)
> +               return -ENXIO;

Don't you need to put of_node_put to decrement the refcount before returning?

> +
> +       ret = request_percpu_irq(irq, vgic_maintenance_handler,
> +                                "vgic", kvm_get_running_vcpus());
> +       if (ret) {
> +               kvm_err("Cannot register interrupt %d\n", irq);
> +               return ret;

Likewise (and similarly throughout this function).

> +       }
> +
> +       ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
> +       if (ret) {
> +               kvm_err("Cannot obtain VCTRL resource\n");
> +               goto out_free_irq;
> +       }
> +
> +       vgic_vctrl_base = of_iomap(vgic_node, 2);
> +       if (!vgic_vctrl_base) {
> +               kvm_err("Cannot ioremap VCTRL\n");
> +               ret = -ENOMEM;
> +               goto out_free_irq;
> +       }
> +
> +       ret = create_hyp_io_mappings(vgic_vctrl_base,
> +                                    vgic_vctrl_base + resource_size(&vctrl_res),
> +                                    vctrl_res.start);
> +       if (ret) {
> +               kvm_err("Cannot map VCTRL into hyp\n");
> +               goto out_unmap;
> +       }
> +
> +       kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, irq);
> +       on_each_cpu(vgic_init_maintenance_interrupt, &irq, 1);

What if all your CPUs aren't online at this point? Do you need a hotplug
notifier to re-enable the PPI? (you should try hotplug on the host actually,
it could be hilarious fun :)

> +
> +       if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
> +               kvm_err("Cannot obtain VCPU resource\n");
> +               ret = -ENXIO;
> +               goto out_unmap;
> +       }
> +       vgic_vcpu_base = vcpu_res.start;
> +
> +       return 0;
> +
> +out_unmap:
> +       iounmap(vgic_vctrl_base);
> +out_free_irq:
> +       free_percpu_irq(irq, kvm_get_running_vcpus());
> +
> +       return ret;
> +}
> +
> +int kvm_vgic_init(struct kvm *kvm)
> +{
> +       int ret = 0, i;
> +
> +       mutex_lock(&kvm->lock);
> +
> +       if (vgic_initialized(kvm))
> +               goto out;
> +
> +       if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
> +           IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
> +               kvm_err("Need to set vgic cpu and dist addresses first\n");
> +               ret = -ENXIO;
> +               goto out;
> +       }
> +
> +       ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
> +                                   vgic_vcpu_base, VGIC_CPU_SIZE);
> +       if (ret) {
> +               kvm_err("Unable to remap VGIC CPU to VCPU\n");
> +               goto out;
> +       }
> +
> +       for (i = 32; i < VGIC_NR_IRQS; i += 4)
> +               vgic_set_target_reg(kvm, 0, i);
> +
> +       kvm->arch.vgic.ready = true;
> +out:
> +       mutex_unlock(&kvm->lock);
> +       return ret;
> +}
> +
> +int kvm_vgic_create(struct kvm *kvm)
> +{
> +       int ret;
> +
> +       mutex_lock(&kvm->lock);
> +
> +       if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
> +               ret = -EEXIST;
> +               goto out;
> +       }
> +
> +       spin_lock_init(&kvm->arch.vgic.lock);
> +       kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
> +       kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
> +       kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
> +
> +       ret = 0;

Might as well assign this when it's declared.

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html