On Sat, Nov 10, 2012 at 03:45:32PM +0000, Christoffer Dall wrote: > From: Marc Zyngier <marc.zyngier@xxxxxxx> > > Add the init code for the hypervisor, the virtual machine, and > the virtual CPUs. > > An interrupt handler is also wired to allow the VGIC maintenance > interrupts, used to deal with level triggered interrupts and LR > underflows. > > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> > Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> > --- > arch/arm/include/asm/kvm_vgic.h | 11 ++ > arch/arm/kvm/arm.c | 14 ++ > arch/arm/kvm/vgic.c | 237 +++++++++++++++++++++++++++++++++++++++ > 3 files changed, 258 insertions(+), 4 deletions(-) > > diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h > index 6e3d303..a8e7a93 100644 > --- a/arch/arm/include/asm/kvm_vgic.h > +++ b/arch/arm/include/asm/kvm_vgic.h > @@ -154,6 +154,7 @@ static inline void vgic_bytemap_set_irq_val(struct vgic_bytemap *x, > struct vgic_dist { > #ifdef CONFIG_KVM_ARM_VGIC > spinlock_t lock; > + bool ready; > > /* Virtual control interface mapping */ > void __iomem *vctrl_base; > @@ -239,6 +240,10 @@ struct kvm_exit_mmio; > > #ifdef CONFIG_KVM_ARM_VGIC > int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); > +int kvm_vgic_hyp_init(void); > +int kvm_vgic_init(struct kvm *kvm); > +int kvm_vgic_create(struct kvm *kvm); > +void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); > void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu); > void kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu); > int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, > @@ -248,6 +253,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, > struct kvm_exit_mmio *mmio); > > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) > +#define vgic_initialized(k) ((k)->arch.vgic.ready) > #else > static inline int kvm_vgic_hyp_init(void) > { > @@ -294,6 +300,11 @@ static inline int irqchip_in_kernel(struct kvm *kvm) > { > return 0; > } > + > +static inline bool vgic_initialized(struct kvm *kvm) > +{ > + return true; > +} > #endif Shouldn't this return false? > > #endif > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index f43da01..a633d9d 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -187,6 +187,8 @@ int kvm_dev_ioctl_check_extension(long ext) > switch (ext) { > #ifdef CONFIG_KVM_ARM_VGIC > case KVM_CAP_IRQCHIP: > + r = vgic_present; > + break; > #endif > case KVM_CAP_USER_MEMORY: > case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: > @@ -623,6 +625,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > if (unlikely(vcpu->arch.target < 0)) > return -ENOEXEC; > > + /* Initalize the VGIC before running a vcpu the first time on this VM */ Initialize > + if (unlikely(irqchip_in_kernel(vcpu->kvm) && Why unlikely? I'd just drop the hint. > + !vgic_initialized(vcpu->kvm))) { > + ret = kvm_vgic_init(vcpu->kvm); > + if (ret) > + return ret; > + } > + > if (run->exit_reason == KVM_EXIT_MMIO) { > ret = kvm_handle_mmio_return(vcpu, vcpu->run); > if (ret) > @@ -1024,8 +1034,8 @@ static int init_hyp_mode(void) > * Init HYP view of VGIC > */ > err = kvm_vgic_hyp_init(); > - if (err) > - goto out_free_mappings; > + if (!err) > + vgic_present = true; > > return 0; Is that an intended change in behaviour? If kvm_vgic_hyp_init fails, you'll return 0... > out_free_vfp: > diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c > index 70040bb..415ddb8 100644 > --- a/arch/arm/kvm/vgic.c > +++ b/arch/arm/kvm/vgic.c > @@ -20,7 +20,14 @@ > #include <linux/kvm_host.h> > #include <linux/interrupt.h> > #include <linux/io.h> > +#include <linux/of.h> > +#include <linux/of_address.h> > +#include <linux/of_irq.h> > + > #include <asm/kvm_emulate.h> > +#include <asm/hardware/gic.h> > +#include <asm/kvm_arm.h> > +#include <asm/kvm_mmu.h> > > /* > * How the whole thing works (courtesy of Christoffer Dall): > @@ -59,11 +66,18 @@ > */ > > #define VGIC_ADDR_UNDEF (-1) > -#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == (typeof(_x))VGIC_ADDR_UNDEF) > +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) Huh? > > #define VGIC_DIST_SIZE 0x1000 > #define VGIC_CPU_SIZE 0x2000 > > +/* Physical address of vgic virtual cpu interface */ > +static phys_addr_t vgic_vcpu_base; > + > +/* Virtual control interface base address */ > +static void __iomem *vgic_vctrl_base; > + > +static struct device_node *vgic_node; > > #define ACCESS_READ_VALUE (1 << 0) > #define ACCESS_READ_RAZ (0 << 0) > @@ -527,7 +541,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exi > > if (!irqchip_in_kernel(vcpu->kvm) || > mmio->phys_addr < base || > - (mmio->phys_addr + mmio->len) > (base + dist->vgic_dist_size)) > + (mmio->phys_addr + mmio->len) > (base + VGIC_DIST_SIZE)) > return false; Again, this is an odd hunk. It looks like you've accidentally got some older context lying around in this patch. > > range = find_matching_range(vgic_ranges, mmio, base); > @@ -957,6 +971,225 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, > return 0; > } > > +static irqreturn_t vgic_maintenance_handler(int irq, void *data) > +{ > + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data; > + struct vgic_dist *dist; > + struct vgic_cpu *vgic_cpu; > + > + if (WARN(!vcpu, > + "VGIC interrupt on CPU %d with no vcpu\n", smp_processor_id())) > + return IRQ_HANDLED; We need to get an answer about the potential delaying of this interrupt, otherwise this might kick more often that we'd like. > + > + vgic_cpu = &vcpu->arch.vgic_cpu; > + dist = &vcpu->kvm->arch.vgic; > + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); > + > + /* > + * We do not need to take the distributor lock here, since the only > + * action we perform is clearing the irq_active_bit for an EOIed > + * level interrupt. There is a potential race with > + * the queuing of an interrupt in __kvm_sync_to_cpu(), where we check > + * if the interrupt is already active. Two possibilities: > + * > + * - The queuing is occuring on the same vcpu: cannot happen, as we're > + * already in the context of this vcpu, and executing the handler > + * - The interrupt has been migrated to another vcpu, and we ignore > + * this interrupt for this run. Big deal. It is still pending though, > + * and will get considered when this vcpu exits. > + */ We've already discussed this in an earlier thread. > + if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) { > + /* > + * Some level interrupts have been EOIed. Clear their > + * active bit. > + */ > + int lr, irq; > + > + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, > + vgic_cpu->nr_lr) { > + irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID; > + > + vgic_bitmap_set_irq_val(&dist->irq_active, > + vcpu->vcpu_id, irq, 0); > + vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI; > + writel_relaxed(vgic_cpu->vgic_lr[lr], > + dist->vctrl_base + GICH_LR0 + (lr << 2)); > + > + /* Any additionnal pending interrupt? */ additional > + if (vgic_bitmap_get_irq_val(&dist->irq_state, > + vcpu->vcpu_id, irq)) { > + set_bit(irq, vcpu->arch.vgic_cpu.pending); > + set_bit(vcpu->vcpu_id, > + &dist->irq_pending_on_cpu); > + } else { > + clear_bit(irq, vgic_cpu->pending); > + } > + } > + } > + > + if (vgic_cpu->vgic_misr & VGIC_MISR_U) { > + vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE; > + writel_relaxed(vgic_cpu->vgic_hcr, dist->vctrl_base + GICH_HCR); > + } > + > + return IRQ_HANDLED; > +} I wonder whether we could instead simplify the irq handler so it just sets a flag indicating that the list registers need updating, then we pick that up on the resume path and keep all the work in one place. Hard to tell. > + > +void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) > +{ > + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; > + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; > + u32 reg; > + int i; > + > + if (!irqchip_in_kernel(vcpu->kvm)) > + return; > + > + for (i = 0; i < VGIC_NR_IRQS; i++) { > + if (i < 16) > + vgic_bitmap_set_irq_val(&dist->irq_enabled, > + vcpu->vcpu_id, i, 1); > + if (i < 32) > + vgic_bitmap_set_irq_val(&dist->irq_cfg, > + vcpu->vcpu_id, i, 1); > + Usual comments here :) > + vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; > + } > + > + BUG_ON(!vcpu->kvm->arch.vgic.vctrl_base); > + reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VTR); > + vgic_cpu->nr_lr = (reg & 0x1f) + 1; > + > + reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VMCR); > + vgic_cpu->vgic_vmcr = reg | (0x1f << 27); /* Priority */ Why isn't this defined with the other registers? > + > + vgic_cpu->vgic_hcr |= VGIC_HCR_EN; /* Get the show on the road... */ > +} > + > +static void vgic_init_maintenance_interrupt(void *info) > +{ > + unsigned int *irqp = info; > + > + enable_percpu_irq(*irqp, 0); > +} > + > +int kvm_vgic_hyp_init(void) > +{ > + int ret; > + unsigned int irq; > + struct resource vctrl_res; > + struct resource vcpu_res; > + > + vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); > + if (!vgic_node) > + return -ENODEV; > + > + irq = irq_of_parse_and_map(vgic_node, 0); > + if (!irq) > + return -ENXIO; Don't you need to put of_node_put to decrement the refcount before returning? > + > + ret = request_percpu_irq(irq, vgic_maintenance_handler, > + "vgic", kvm_get_running_vcpus()); > + if (ret) { > + kvm_err("Cannot register interrupt %d\n", irq); > + return ret; Likewise (and similarly throughout this function). > + } > + > + ret = of_address_to_resource(vgic_node, 2, &vctrl_res); > + if (ret) { > + kvm_err("Cannot obtain VCTRL resource\n"); > + goto out_free_irq; > + } > + > + vgic_vctrl_base = of_iomap(vgic_node, 2); > + if (!vgic_vctrl_base) { > + kvm_err("Cannot ioremap VCTRL\n"); > + ret = -ENOMEM; > + goto out_free_irq; > + } > + > + ret = create_hyp_io_mappings(vgic_vctrl_base, > + vgic_vctrl_base + resource_size(&vctrl_res), > + vctrl_res.start); > + if (ret) { > + kvm_err("Cannot map VCTRL into hyp\n"); > + goto out_unmap; > + } > + > + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, irq); > + on_each_cpu(vgic_init_maintenance_interrupt, &irq, 1); What if all your CPUs aren't online at this point? Do you need a hotplug notifier to re-enable the PPI? (you should try hotplug on the host actually, it could be hilarious fun :) > + > + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { > + kvm_err("Cannot obtain VCPU resource\n"); > + ret = -ENXIO; > + goto out_unmap; > + } > + vgic_vcpu_base = vcpu_res.start; > + > + return 0; > + > +out_unmap: > + iounmap(vgic_vctrl_base); > +out_free_irq: > + free_percpu_irq(irq, kvm_get_running_vcpus()); > + > + return ret; > +} > + > +int kvm_vgic_init(struct kvm *kvm) > +{ > + int ret = 0, i; > + > + mutex_lock(&kvm->lock); > + > + if (vgic_initialized(kvm)) > + goto out; > + > + if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || > + IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { > + kvm_err("Need to set vgic cpu and dist addresses first\n"); > + ret = -ENXIO; > + goto out; > + } > + > + ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, > + vgic_vcpu_base, VGIC_CPU_SIZE); > + if (ret) { > + kvm_err("Unable to remap VGIC CPU to VCPU\n"); > + goto out; > + } > + > + for (i = 32; i < VGIC_NR_IRQS; i += 4) > + vgic_set_target_reg(kvm, 0, i); > + > + kvm->arch.vgic.ready = true; > +out: > + mutex_unlock(&kvm->lock); > + return ret; > +} > + > +int kvm_vgic_create(struct kvm *kvm) > +{ > + int ret; > + > + mutex_lock(&kvm->lock); > + > + if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { > + ret = -EEXIST; > + goto out; > + } > + > + spin_lock_init(&kvm->arch.vgic.lock); > + kvm->arch.vgic.vctrl_base = vgic_vctrl_base; > + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; > + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; > + > + ret = 0; Might as well assign this when it's declared. Will -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html