Hello! > -----Original Message----- > From: kvm-owner@xxxxxxxxxxxxxxx [mailto:kvm-owner@xxxxxxxxxxxxxxx] On Behalf Of Andre Przywara > Sent: Wednesday, October 07, 2015 5:55 PM > To: marc.zyngier@xxxxxxx; christoffer.dall@xxxxxxxxxx > Cc: eric.auger@xxxxxxxxxx; p.fedin@xxxxxxxxxxx; kvmarm@xxxxxxxxxxxxxxxxxxxxx; linux-arm- > kernel@xxxxxxxxxxxxxxxxxxx; kvm@xxxxxxxxxxxxxxx > Subject: [PATCH v3 12/16] KVM: arm64: handle pending bit for LPIs in ITS emulation > > As the actual LPI number in a guest can be quite high, but is mostly > assigned using a very sparse allocation scheme, bitmaps and arrays > for storing the virtual interrupt status are a waste of memory. > We use our equivalent of the "Interrupt Translation Table Entry" > (ITTE) to hold this extra status information for a virtual LPI. > As the normal VGIC code cannot use its fancy bitmaps to manage > pending interrupts, we provide a hook in the VGIC code to let the > ITS emulation handle the list register queueing itself. > LPIs are located in a separate number range (>=8192), so > distinguishing them is easy. With LPIs being only edge-triggered, we > get away with a less complex IRQ handling. > We extend the number of bits for storing the IRQ number in our > LR struct to 16 to cover the LPI numbers we support as well. > > Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx> > --- > Changelog v2..v3: > - extend LR data structure to hold 16-bit wide IRQ IDs > - only clear pending bit if IRQ could be queued > - adapt __kvm_vgic_sync_hwstate() to upstream changes > > include/kvm/arm_vgic.h | 4 +- > virt/kvm/arm/its-emul.c | 75 ++++++++++++++++++++++++++++++++++++ > virt/kvm/arm/its-emul.h | 3 ++ > virt/kvm/arm/vgic-v3-emul.c | 2 + > virt/kvm/arm/vgic.c | 93 +++++++++++++++++++++++++++++++-------------- > 5 files changed, 148 insertions(+), 29 deletions(-) > > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index c3eb414..035911f 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -95,7 +95,7 @@ enum vgic_type { > #define LR_HW (1 << 3) > > struct vgic_lr { > - unsigned irq:10; > + unsigned irq:16; > union { > unsigned hwirq:10; > unsigned source:3; > @@ -147,6 +147,8 @@ struct vgic_vm_ops { > int (*init_model)(struct kvm *); > void (*destroy_model)(struct kvm *); > int (*map_resources)(struct kvm *, const struct vgic_params *); > + bool (*queue_lpis)(struct kvm_vcpu *); > + void (*unqueue_lpi)(struct kvm_vcpu *, int irq); > }; > > struct vgic_io_device { > diff --git a/virt/kvm/arm/its-emul.c b/virt/kvm/arm/its-emul.c > index bab8033..8349970 100644 > --- a/virt/kvm/arm/its-emul.c > +++ b/virt/kvm/arm/its-emul.c > @@ -59,8 +59,27 @@ struct its_itte { > struct its_collection *collection; > u32 lpi; > u32 event_id; > + bool enabled; > + unsigned long *pending; > }; > > +/* To be used as an iterator this macro misses the enclosing parentheses */ > +#define for_each_lpi(dev, itte, kvm) \ > + list_for_each_entry(dev, &(kvm)->arch.vgic.its.device_list, dev_list) \ > + list_for_each_entry(itte, &(dev)->itt, itte_list) > + > +static struct its_itte *find_itte_by_lpi(struct kvm *kvm, int lpi) > +{ > + struct its_device *device; > + struct its_itte *itte; > + > + for_each_lpi(device, itte, kvm) { > + if (itte->lpi == lpi) > + return itte; > + } > + return NULL; > +} > + > #define BASER_BASE_ADDRESS(x) ((x) & 0xfffffffff000ULL) > > /* The distributor lock is held by the VGIC MMIO handler. */ > @@ -154,9 +173,65 @@ static bool handle_mmio_gits_idregs(struct kvm_vcpu *vcpu, > return false; > } > > +/* > + * Find all enabled and pending LPIs and queue them into the list > + * registers. > + * The dist lock is held by the caller. > + */ > +bool vits_queue_lpis(struct kvm_vcpu *vcpu) > +{ > + struct vgic_its *its = &vcpu->kvm->arch.vgic.its; > + struct its_device *device; > + struct its_itte *itte; > + bool ret = true; > + > + if (!vgic_has_its(vcpu->kvm)) > + return true; > + if (!its->enabled || !vcpu->kvm->arch.vgic.lpis_enabled) > + return true; > + > + spin_lock(&its->lock); > + for_each_lpi(device, itte, vcpu->kvm) { > + if (!itte->enabled || !test_bit(vcpu->vcpu_id, itte->pending)) > + continue; > + > + if (!itte->collection) > + continue; > + > + if (itte->collection->target_addr != vcpu->vcpu_id) > + continue; > + > + > + if (vgic_queue_irq(vcpu, 0, itte->lpi)) > + __clear_bit(vcpu->vcpu_id, itte->pending); > + else > + ret = false; Shouldn't we also have 'break' here? If vgic_queue_irq() returns false, this means we have no more LRs to use, therefore it makes no sense to keep iterating. > + } > + > + spin_unlock(&its->lock); > + return ret; > +} > + > +/* Called with the distributor lock held by the caller. */ > +void vits_unqueue_lpi(struct kvm_vcpu *vcpu, int lpi) > +{ > + struct vgic_its *its = &vcpu->kvm->arch.vgic.its; > + struct its_itte *itte; > + > + spin_lock(&its->lock); > + > + /* Find the right ITTE and put the pending state back in there */ > + itte = find_itte_by_lpi(vcpu->kvm, lpi); > + if (itte) > + __set_bit(vcpu->vcpu_id, itte->pending); > + > + spin_unlock(&its->lock); > +} > + > static void its_free_itte(struct its_itte *itte) > { > list_del(&itte->itte_list); > + kfree(itte->pending); > kfree(itte); > } > > diff --git a/virt/kvm/arm/its-emul.h b/virt/kvm/arm/its-emul.h > index 472a6d0..cc5d5ff 100644 > --- a/virt/kvm/arm/its-emul.h > +++ b/virt/kvm/arm/its-emul.h > @@ -33,4 +33,7 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu); > int vits_init(struct kvm *kvm); > void vits_destroy(struct kvm *kvm); > > +bool vits_queue_lpis(struct kvm_vcpu *vcpu); > +void vits_unqueue_lpi(struct kvm_vcpu *vcpu, int irq); > + > #endif > diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c > index e9aa29e..f482e34 100644 > --- a/virt/kvm/arm/vgic-v3-emul.c > +++ b/virt/kvm/arm/vgic-v3-emul.c > @@ -944,6 +944,8 @@ void vgic_v3_init_emulation(struct kvm *kvm) > dist->vm_ops.init_model = vgic_v3_init_model; > dist->vm_ops.destroy_model = vgic_v3_destroy_model; > dist->vm_ops.map_resources = vgic_v3_map_resources; > + dist->vm_ops.queue_lpis = vits_queue_lpis; > + dist->vm_ops.unqueue_lpi = vits_unqueue_lpi; > > dist->vgic_dist_base = VGIC_ADDR_UNDEF; > dist->vgic_redist_base = VGIC_ADDR_UNDEF; > diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c > index 11bf692..9ee87d3 100644 > --- a/virt/kvm/arm/vgic.c > +++ b/virt/kvm/arm/vgic.c > @@ -120,6 +120,20 @@ static bool queue_sgi(struct kvm_vcpu *vcpu, int irq) > return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq); > } > > +static bool vgic_queue_lpis(struct kvm_vcpu *vcpu) > +{ > + if (vcpu->kvm->arch.vgic.vm_ops.queue_lpis) > + return vcpu->kvm->arch.vgic.vm_ops.queue_lpis(vcpu); > + else > + return true; > +} > + > +static void vgic_unqueue_lpi(struct kvm_vcpu *vcpu, int irq) > +{ > + if (vcpu->kvm->arch.vgic.vm_ops.unqueue_lpi) > + vcpu->kvm->arch.vgic.vm_ops.unqueue_lpi(vcpu, irq); > +} > + > int kvm_vgic_map_resources(struct kvm *kvm) > { > return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic); > @@ -1148,18 +1162,28 @@ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) > static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, > int lr_nr, struct vgic_lr vlr) > { > - if (vgic_irq_is_active(vcpu, irq)) { > - vlr.state |= LR_STATE_ACTIVE; > - kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); > - vgic_irq_clear_active(vcpu, irq); > - vgic_update_state(vcpu->kvm); > - } else if (vgic_dist_irq_is_pending(vcpu, irq)) { > - vlr.state |= LR_STATE_PENDING; > - kvm_debug("Set pending: 0x%x\n", vlr.state); > + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; > + > + /* We care only about state for SGIs/PPIs/SPIs, not for LPIs */ > + if (irq < dist->nr_irqs) { > + if (vgic_irq_is_active(vcpu, irq)) { > + vlr.state |= LR_STATE_ACTIVE; > + kvm_debug("Set active, clear distributor: 0x%x\n", > + vlr.state); > + vgic_irq_clear_active(vcpu, irq); > + vgic_update_state(vcpu->kvm); > + } else if (vgic_dist_irq_is_pending(vcpu, irq)) { > + vlr.state |= LR_STATE_PENDING; > + kvm_debug("Set pending: 0x%x\n", vlr.state); > + } > + if (!vgic_irq_is_edge(vcpu, irq)) > + vlr.state |= LR_EOI_INT; > + } else { > + /* If this is an LPI, it can only be pending */ > + if (irq >= 8192) > + vlr.state |= LR_STATE_PENDING; > } > > - if (!vgic_irq_is_edge(vcpu, irq)) > - vlr.state |= LR_EOI_INT; > > if (vlr.irq >= VGIC_NR_SGIS) { > struct irq_phys_map *map; > @@ -1190,16 +1214,14 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, > */ > bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) > { > - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; > - struct vgic_lr vlr; > u64 elrsr = vgic_get_elrsr(vcpu); > unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); > + struct vgic_lr vlr; > int lr; > > /* Sanitize the input... */ > BUG_ON(sgi_source_id & ~7); > BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); > - BUG_ON(irq >= dist->nr_irqs); > > kvm_debug("Queue IRQ%d\n", irq); > > @@ -1282,8 +1304,12 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) > overflow = 1; > } > > - > - > + /* > + * LPIs are not mapped in our bitmaps, so we leave the iteration > + * to the ITS emulation code. > + */ > + if (!vgic_queue_lpis(vcpu)) > + overflow = 1; > > epilog: > if (overflow) { > @@ -1488,20 +1514,30 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > if (test_bit(lr, elrsr_ptr)) > continue; > > - /* Reestablish SGI source for pending and active SGIs */ > - if (vlr.irq < VGIC_NR_SGIS) > - add_sgi_source(vcpu, vlr.irq, vlr.source); > - > - if (vlr.state & LR_STATE_PENDING) > - vgic_dist_irq_set_pending(vcpu, vlr.irq); > - > - if (vlr.state & LR_STATE_ACTIVE) { > - if (vlr.state & LR_STATE_PENDING) { > - vgic_irq_set_active(vcpu, vlr.irq); > - } else { > - /* Active-only IRQs stay in the LR */ > - pending = true; > + /* LPIs are handled separately */ > + if (vlr.irq >= 8192) { > + /* We just need to take care about still pending LPIs */ > + if (!(vlr.state & LR_STATE_PENDING)) > continue; > + vgic_unqueue_lpi(vcpu, vlr.irq); > + } else { > + BUG_ON(!(vlr.state & LR_STATE_MASK)); > + > + /* Reestablish SGI source for pending and active SGIs */ > + if (vlr.irq < VGIC_NR_SGIS) > + add_sgi_source(vcpu, vlr.irq, vlr.source); > + > + if (vlr.state & LR_STATE_PENDING) > + vgic_dist_irq_set_pending(vcpu, vlr.irq); > + > + if (vlr.state & LR_STATE_ACTIVE) { > + if (vlr.state & LR_STATE_PENDING) { > + vgic_irq_set_active(vcpu, vlr.irq); > + } else { > + /* Active-only IRQs stay in the LR */ > + pending = true; > + continue; > + } > } > } > > @@ -1512,6 +1548,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > } > vgic_update_state(vcpu->kvm); > > + /* vgic_update_state would not cover only-active IRQs or LPIs */ > if (pending) > set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); > spin_unlock(&dist->lock); > -- > 2.5.1 Kind regards, Pavel Fedin Expert Engineer Samsung Electronics Research center Russia -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html