Re: [PATCH v2 04/11] ARM: KVM: VGIC distributor handling

Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> · Mon, 20 Aug 2012 09:48:00 -0400



On Mon, Aug 20, 2012 at 7:58 AM, Marc Zyngier <marc.zyngier@xxxxxxx> wrote:
> On 18/08/12 04:00, Christoffer Dall wrote:
>> On Thu, Jul 5, 2012 at 11:28 AM, Marc Zyngier <marc.zyngier@xxxxxxx> wrote:
>>> Add the GIC distributor emulation code. A number of the GIC features
>>> are simply ignored as they are not required to boot a Linux guest.
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx>
>>> ---
>>>  arch/arm/include/asm/kvm_vgic.h |  168 +++++++++++++++
>>>  arch/arm/kvm/vgic.c             |  447 ++++++++++++++++++++++++++++++++++++++-
>>>  2 files changed, 614 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
>>> index 59c2f60..02f50b7 100644
>>> --- a/arch/arm/include/asm/kvm_vgic.h
>>> +++ b/arch/arm/include/asm/kvm_vgic.h
>>> @@ -1,7 +1,175 @@
>>>  #ifndef __ASM_ARM_KVM_VGIC_H
>>>  #define __ASM_ARM_KVM_VGIC_H
>>>
>>> +#include <linux/kernel.h>
>>> +#include <linux/kvm.h>
>>> +#include <linux/irqreturn.h>
>>> +#include <linux/spinlock.h>
>>> +#include <linux/types.h>
>>> +
>>> +#define VGIC_NR_IRQS           128
>>> +#define VGIC_NR_SHARED_IRQS    (VGIC_NR_IRQS - 32)
>>> +#define VGIC_MAX_CPUS          KVM_MAX_VCPUS
>>> +
>>> +/* Sanity checks... */
>>> +#if (VGIC_MAX_CPUS > 8)
>>> +#error Invalid number of CPU interfaces
>>> +#endif
>>> +
>>> +#if (VGIC_NR_IRQS & 31)
>>> +#error "VGIC_NR_IRQS must be a multiple of 32"
>>> +#endif
>>> +
>>> +#if (VGIC_NR_IRQS > 1024)
>>> +#error "VGIC_NR_IRQS must be <= 1024"
>>> +#endif
>>> +
>>> +/*
>>> + * The GIC distributor registers describing interrupts have two parts:
>>> + * - 32 per-CPU interrupts (SGI + PPI)
>>> + * - a bunch of shared interrups (SPI)
>>> + */
>>> +struct vgic_bitmap {
>>> +       union {
>>> +               u32 reg[1];
>>> +               unsigned long reg_ul[0];
>>> +       } percpu[VGIC_MAX_CPUS];
>>> +       union {
>>> +               u32 reg[VGIC_NR_SHARED_IRQS / 32];
>>> +               unsigned long reg_ul[0];
>>> +       } shared;
>>> +};
>>> +
>>> +static inline u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
>>> +                                      int cpuid, u32 offset)
>>> +{
>>> +       offset >>= 2;
>>> +       BUG_ON(offset > (VGIC_NR_IRQS / 32));
>>> +       if (!offset)
>>> +               return x->percpu[cpuid].reg;
>>> +       else
>>> +               return x->shared.reg + offset - 1;
>>> +}
>>> +
>>> +static inline int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
>>> +                                        int cpuid, int irq)
>>> +{
>>> +       if (irq < 32)
>>> +               return test_bit(irq, x->percpu[cpuid].reg_ul);
>>> +
>>> +       return test_bit(irq - 32, x->shared.reg_ul);
>>> +}
>>> +
>>> +static inline void vgic_bitmap_set_irq_val(struct vgic_bitmap *x,
>>> +                                          int cpuid, int irq, int val)
>>> +{
>>> +       unsigned long *reg;
>>> +
>>> +       if (irq < 32)
>>> +               reg = x->percpu[cpuid].reg_ul;
>>> +       else {
>>> +               reg =  x->shared.reg_ul;
>>> +               irq -= 32;
>>> +       }
>>> +
>>> +       if (val)
>>> +               set_bit(irq, reg);
>>> +       else
>>> +               clear_bit(irq, reg);
>>> +}
>>> +
>>> +static inline unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x,
>>> +                                                    int cpuid)
>>> +{
>>> +       if (unlikely(cpuid >= VGIC_MAX_CPUS))
>>> +               return NULL;
>>> +       return x->percpu[cpuid].reg_ul;
>>> +}
>>> +
>>> +static inline unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
>>> +{
>>> +       return x->shared.reg_ul;
>>> +}
>>> +
>>> +struct vgic_bytemap {
>>> +       union {
>>> +               u32 reg[8];
>>> +               unsigned long reg_ul[0];
>>> +       } percpu[VGIC_MAX_CPUS];
>>> +       union {
>>> +               u32 reg[VGIC_NR_SHARED_IRQS  / 4];
>>> +               unsigned long reg_ul[0];
>>> +       } shared;
>>> +};
>>> +
>>> +static inline u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x,
>>> +                                       int cpuid, u32 offset)
>>> +{
>>> +       offset >>= 2;
>>> +       BUG_ON(offset > (VGIC_NR_IRQS / 4));
>>> +       if (offset < 4)
>>> +               return x->percpu[cpuid].reg + offset;
>>> +       else
>>> +               return x->shared.reg + offset - 8;
>>> +}
>>> +
>>> +static inline int vgic_bytemap_get_irq_val(struct vgic_bytemap *x,
>>> +                                          int cpuid, int irq)
>>> +{
>>> +       u32 *reg, shift;
>>> +       shift = (irq & 3) * 8;
>>> +       reg = vgic_bytemap_get_reg(x, cpuid, irq);
>>> +       return (*reg >> shift) & 0xff;
>>> +}
>>> +
>>> +static inline void vgic_bytemap_set_irq_val(struct vgic_bytemap *x,
>>> +                                           int cpuid, int irq, int val)
>>> +{
>>> +       u32 *reg, shift;
>>> +       shift = (irq & 3) * 8;
>>> +       reg = vgic_bytemap_get_reg(x, cpuid, irq);
>>> +       *reg &= ~(0xff << shift);
>>> +       *reg |= (val & 0xff) << shift;
>>> +}
>>> +
>>>  struct vgic_dist {
>>> +#ifdef CONFIG_KVM_ARM_VGIC
>>> +       spinlock_t              lock;
>>> +
>>> +       /* Virtual control interface mapping */
>>> +       void __iomem            *vctrl_base;
>>> +
>>> +       /* Distributor mapping in the guest */
>>> +       unsigned long           vgic_dist_base;
>>> +       unsigned long           vgic_dist_size;
>>> +
>>> +       /* Distributor enabled */
>>> +       u32                     enabled;
>>> +
>>> +       /* Interrupt enabled (one bit per IRQ) */
>>> +       struct vgic_bitmap      irq_enabled;
>>> +
>>> +       /* Interrupt 'pin' level */
>>> +       struct vgic_bitmap      irq_state;
>>> +
>>> +       /* Level-triggered interrupt in progress */
>>> +       struct vgic_bitmap      irq_active;
>>> +
>>> +       /* Interrupt priority. Not used yet. */
>>> +       struct vgic_bytemap     irq_priority;
>>> +
>>> +       /* Level/edge triggered */
>>> +       struct vgic_bitmap      irq_cfg;
>>> +
>>> +       /* Source CPU per SGI and target CPU */
>>> +       u8                      irq_sgi_sources[VGIC_MAX_CPUS][16];
>>> +
>>> +       /* Target CPU for each IRQ */
>>> +       struct vgic_bitmap      irq_spi_target[VGIC_MAX_CPUS];
>>> +
>>> +       /* Bitmap indicating which CPU has something pending */
>>> +       unsigned long           irq_pending_on_cpu;
>>> +#endif
>>>  };
>>>
>>>  struct vgic_cpu {
>>> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
>>> index 0502212..ad48c89 100644
>>> --- a/arch/arm/kvm/vgic.c
>>> +++ b/arch/arm/kvm/vgic.c
>>> @@ -22,6 +22,43 @@
>>>  #include <linux/io.h>
>>>  #include <asm/kvm_emulate.h>
>>>
>>> +/*
>>> + * How the whole thing works (courtesy of Christoffer Dall):
>>> + *
>>> + * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
>>> + *   something is pending
>>> + * - VGIC pending interrupts are stored on the vgic.irq_state vgic
>>> + *   bitmap (this bitmap is updated by both user land ioctls and guest
>>> + *   mmio ops) and indicate the 'wire' state.
>>> + * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
>>> + *   recalculated
>>> + * - To calculate the oracle, we need info for each cpu from
>>> + *   compute_pending_for_cpu, which considers:
>>> + *   - PPI: dist->irq_state & dist->irq_enable
>>> + *   - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
>>> + *   - irq_spi_target is a 'formatted' of the GICD_ICFGR registers,
>>
>> s/'formatted' of/'formatted' version of/
>>
>>> + *     stored on each vcpu. We only keep one bit of information per
>>> + *     interrupt, making sure that only one vcpu can accept the
>>> + *     interrupt.
>>> + *
>>> + * The handling of level interrupts adds some extra complexity. We
>>> + * need to track when the interrupt has been EOIed, so we can sample
>>> + * the 'line' again. This is achieve as such:
>>
>> s/achieve/achieved/
>>
>>> + *
>>> + * - When a level interrupt in moved onto a vcpu, the corresponding
>>
>> s/in/is/
>>
>>> + *   bit in irq_active is set. As long as this bit is set, the line
>>> + *   will be ignored for further interrupts. The interrupt is injected
>>> + *   into the vcpu with the VGIC_LR_EOI bit set (generate a
>>> + *   maintainance interrupt on EOI).
>>
>> question: can a peripheral lower the interrupt line again if it's a
>> level interrupt and should that then remove the pending state from the
>> cpu (move the interrupt away from a vcpu again)? Do we currently
>> support this?
>
> From reading the GIC spec, it looks like deasserting the line removes
> the pending state, but not the active state. We will definitely reflect
> this change at the distributor level.
>
> What we don't do yet is canceling the interrupt at the vcpu level if it
> hasn't been run yet. I'm not sure this is worth it, and the spec doesn't
> say the interrupt should be recalled. Need to think of that case a bit more.
>
>>> + * - When the interrupt is EOIed, the maintainance interrupt fires,
>>> + *   and clears the corresponding bit in irq_active. This allow the
>>> + *   interrupt line to be sampled again.
>>> + */
>>> +
>>> +/* Temporary hacks, need to be provided by userspace emulation */
>>> +#define VGIC_DIST_BASE         0x2c001000
>>> +#define VGIC_DIST_SIZE         0x1000
>>
>> are we targeting to fix this before a merge into my tree or is it too far ahead?
>
> I'm not sure we have any plan for this yet. Peter?
>
>>> +
>>>  #define ACCESS_READ_VALUE      (1 << 0)
>>>  #define ACCESS_READ_RAZ                (0 << 0)
>>>  #define ACCESS_READ_MASK(x)    ((x) & (1 << 0))
>>> @@ -31,6 +68,14 @@
>>>  #define ACCESS_WRITE_VALUE     (3 << 1)
>>>  #define ACCESS_WRITE_MASK(x)   ((x) & (3 << 1))
>>>
>>> +static void vgic_update_state(struct kvm *kvm);
>>> +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
>>> +
>>> +static inline int vgic_irq_is_edge(struct vgic_dist *dist, int irq)
>>> +{
>>> +       return vgic_bitmap_get_irq_val(&dist->irq_cfg, 0, irq);
>>> +}
>>> +
>>>  static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, u32 offset, int mode)
>>>  {
>>>         int word_offset = offset & 3;
>>> @@ -82,6 +127,254 @@ static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, u32 offset, in
>>>         }
>>>  }
>>>
>>> +static void handle_mmio_misc(struct kvm_vcpu *vcpu,
>>> +                            struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 reg;
>>> +       u32 u32off = offset & 3;
>>> +
>>> +       switch (offset & ~3) {
>>> +       case 0:                 /* CTLR */
>>> +               reg = vcpu->kvm->arch.vgic.enabled;
>>> +               vgic_reg_access(mmio, &reg, u32off,
>>> +                               ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
>>> +               if (mmio->mmio.is_write) {
>>> +                       vcpu->kvm->arch.vgic.enabled = reg & 1;
>>> +                       vgic_update_state(vcpu->kvm);
>>> +               }
>>> +               break;
>>> +
>>> +       case 4:                 /* TYPER */
>>> +               reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
>>> +               reg |= (VGIC_NR_IRQS >> 5) - 1;
>>> +               vgic_reg_access(mmio, &reg, u32off,
>>> +                               ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
>>> +               break;
>>> +
>>> +       case 8:                 /* IIDR */
>>> +               reg = 0x4B00043B;
>>> +               vgic_reg_access(mmio, &reg, u32off,
>>> +                               ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
>>> +               break;
>>> +       }
>>> +}
>>> +
>>> +static void handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
>>> +                              struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       vgic_reg_access(mmio, NULL, offset,
>>> +                       ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
>>> +}
>>> +
>>> +static void handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
>>> +                                      struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
>>> +                                      vcpu->vcpu_id, offset);
>>> +       vgic_reg_access(mmio, reg, offset,
>>> +                       ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
>>> +       if (mmio->mmio.is_write)
>>> +               vgic_update_state(vcpu->kvm);
>>> +}
>>> +
>>> +static void handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
>>> +                                        struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
>>> +                                      vcpu->vcpu_id, offset);
>>> +       vgic_reg_access(mmio, reg, offset,
>>> +                       ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
>>> +       if (mmio->mmio.is_write) {
>>> +               if (offset < 4) /* Force SGI enabled */
>>> +                       *reg |= 0xffff;
>>> +               vgic_update_state(vcpu->kvm);
>>> +       }
>>> +}
>>> +
>>> +static void handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
>>> +                                       struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
>>> +                                      vcpu->vcpu_id, offset);
>>> +       vgic_reg_access(mmio, reg, offset,
>>> +                       ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
>>> +       if (mmio->mmio.is_write)
>>> +               vgic_update_state(vcpu->kvm);
>>> +}
>>> +
>>> +static void handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
>>> +                                         struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
>>> +                                      vcpu->vcpu_id, offset);
>>> +       vgic_reg_access(mmio, reg, offset,
>>> +                       ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
>>> +       if (mmio->mmio.is_write)
>>> +               vgic_update_state(vcpu->kvm);
>>> +}
>>> +
>>> +static void handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
>>> +                                    struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
>>> +                                       vcpu->vcpu_id, offset);
>>> +       vgic_reg_access(mmio, reg, offset,
>>> +                       ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
>>> +}
>>> +
>>> +static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
>>> +{
>>> +       struct vgic_dist *dist = &kvm->arch.vgic;
>>> +       struct kvm_vcpu *vcpu;
>>> +       int i, c;
>>> +       unsigned long *bmap;
>>> +       u32 val = 0;
>>> +
>>> +       BUG_ON(irq & 3);
>>> +       BUG_ON(irq < 32);
>>> +
>>> +       irq -= 32;
>>> +
>>> +       kvm_for_each_vcpu(c, vcpu, kvm) {
>>> +               bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
>>> +               for (i = 0; i < 4; i++)
>>> +                       if (test_bit(irq + i, bmap))
>>> +                               val |= 1 << (c + i * 8);
>>> +       }
>>> +
>>> +       return val;
>>> +}
>>> +
>>> +static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
>>> +{
>>> +       struct vgic_dist *dist = &kvm->arch.vgic;
>>> +       struct kvm_vcpu *vcpu;
>>> +       int i, c;
>>> +       unsigned long *bmap;
>>> +       u32 target;
>>> +
>>> +       BUG_ON(irq & 3);
>>> +       BUG_ON(irq < 32);
>>> +
>>> +       irq -= 32;
>>> +
>>> +       /*
>>> +        * Pick the LSB in each byte. This ensure we only target one
>>
>> s/ensure/ensures/
>>
>> consider: This ensures we target exactly one vcpu per IRQ.
>>
>> remind me, why do we only target one? what happens if for example the
>> timer targets two cpus, but the first masks the IRQ line, wouldn't the
>> guest expect to get timer interrupts and we deny it?
>>
>>> +        * single vcpu per IRQ. If the byte is null, assume we target
>>> +        * CPU0.
>>> +        */
>>> +       for (i = 0; i < 32; i += 8) {
>>> +               target = ffs(val & (0xffU << i));
>>> +               val &= ~(0xffU << i);
>>> +               val |= 1 << (target ? (target - 1) : i);
>>> +       }
>>> +
>>> +       kvm_for_each_vcpu(c, vcpu, kvm) {
>>
>> hmm, with above you don't need this loop do you? you can just re-caclc
>> the CPU number and get the corresponding irq_spu_target for the vcpu
>> index and do the {set/clear}_bit?
>
> Yes, this can be simplified indeed.
>
>> not sure if it's nicer
>>
>>> +               bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
>>> +               for (i = 0; i < 4; i++) {
>>> +                       if (val & (1 << (c + i * 8)))
>>> +                               set_bit(irq + i, bmap);
>>> +                       else
>>> +                               clear_bit(irq + i, bmap);
>>> +               }
>>> +       }
>>> +}
>>> +
>>> +static void handle_mmio_target_reg(struct kvm_vcpu *vcpu,
>>> +                                  struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 reg;
>>> +
>>> +       /* We treat the banked interrupts targets as read-only */
>>> +       if (offset < 32) {
>>> +               u32 roreg = 1 << vcpu->vcpu_id;
>>> +               roreg |= roreg << 8;
>>> +               roreg |= roreg << 16;
>>> +
>>> +               vgic_reg_access(mmio, &roreg, offset,
>>> +                               ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
>>> +               return;
>>> +       }
>>> +
>>> +       reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
>>> +       vgic_reg_access(mmio, &reg, offset,
>>> +                       ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
>>> +       if (mmio->mmio.is_write) {
>>> +               vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
>>> +               vgic_update_state(vcpu->kvm);
>>> +       }
>>> +}
>>> +
>>> +static u32 vgic_cfg_expand(u16 val)
>>> +{
>>> +       u32 res = 0;
>>> +       int i;
>>> +
>>> +       for (i = 0; i < 16; i++)
>>> +               res |= (val >> i) << (2 * i + 1);
>>> +
>>> +       return res;
>>> +}
>>> +
>>> +static u16 vgic_cfg_compress(u32 val)
>>> +{
>>> +       u16 res = 0;
>>> +       int i;
>>> +
>>> +       for (i = 0; i < 16; i++)
>>> +               res |= (val >> (i * 2 + 1)) << i;
>>> +
>>> +       return res;
>>> +}
>>> +
>>> +/*
>>> + * The distributor uses 2 bits per IRQ for the CFG register, but the
>>> + * LSB is always 0. As such, we only keep the upper bit, and use the
>>> + * two above functions to compress/expand the bits
>>> + */
>>> +static void handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
>>> +                               struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 val;
>>> +       u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
>>> +                                      vcpu->vcpu_id, offset >> 1);
>>> +       if (offset & 2)
>>> +               val = *reg >> 16;
>>> +       else
>>> +               val = *reg & 0xffff;
>>> +
>>> +       val = vgic_cfg_expand(val);
>>> +       vgic_reg_access(mmio, &val, offset,
>>> +                       ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
>>> +       if (mmio->mmio.is_write) {
>>> +               if (offset < 4) {
>>> +                       *reg = ~0U; /* Force PPIs/SGIs to 1 */
>>> +                       return;
>>> +               }
>>> +
>>> +               val = vgic_cfg_compress(val);
>>> +               if (offset & 2) {
>>> +                       *reg &= 0xffff;
>>
>> this line is unnecessary isn't it?
>
> Unnecessary? You need to mask out the top bits before or-ing with the
> new value.

right you are, I read the val = *reg >> 16 as *reg = *reg >> 16

>>
>>> +                       *reg |= val << 16;
>>> +               } else {
>>> +                       *reg &= 0xffff << 16;
>>> +                       *reg |= val;
>>> +               }
>>> +       }
>>> +}
>>> +
>>> +static void handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
>>> +                               struct kvm_exit_mmio *mmio, u32 offset)
>>> +{
>>> +       u32 reg;
>>> +       vgic_reg_access(mmio, &reg, offset,
>>> +                       ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
>>> +       if (mmio->mmio.is_write) {
>>> +               vgic_dispatch_sgi(vcpu, reg);
>>> +               vgic_update_state(vcpu->kvm);
>>> +       }
>>> +}
>>> +
>>>  /* All this should be handled by kvm_bus_io_*()... FIXME!!! */
>>>  struct mmio_range {
>>>         unsigned long base;
>>> @@ -91,6 +384,66 @@ struct mmio_range {
>>>  };
>>>
>>>  static const struct mmio_range vgic_ranges[] = {
>>> +       {                       /* CTRL, TYPER, IIDR */
>>> +               .base           = 0,
>>> +               .len            = 12,
>>> +               .handle_mmio    = handle_mmio_misc,
>>> +       },
>>> +       {                       /* IGROUPRn */
>>> +               .base           = 0x80,
>>> +               .len            = VGIC_NR_IRQS / 8,
>>> +               .handle_mmio    = handle_mmio_raz_wi,
>>> +       },
>>> +       {                       /* ISENABLERn */
>>> +               .base           = 0x100,
>>> +               .len            = VGIC_NR_IRQS / 8,
>>> +               .handle_mmio    = handle_mmio_set_enable_reg,
>>> +       },
>>> +       {                       /* ICENABLERn */
>>> +               .base           = 0x180,
>>> +               .len            = VGIC_NR_IRQS / 8,
>>> +               .handle_mmio    = handle_mmio_clear_enable_reg,
>>> +       },
>>> +       {                       /* ISPENDRn */
>>> +               .base           = 0x200,
>>> +               .len            = VGIC_NR_IRQS / 8,
>>> +               .handle_mmio    = handle_mmio_set_pending_reg,
>>> +       },
>>> +       {                       /* ICPENDRn */
>>> +               .base           = 0x280,
>>> +               .len            = VGIC_NR_IRQS / 8,
>>> +               .handle_mmio    = handle_mmio_clear_pending_reg,
>>> +       },
>>> +       {                       /* ISACTIVERn */
>>> +               .base           = 0x300,
>>> +               .len            = VGIC_NR_IRQS / 8,
>>> +               .handle_mmio    = handle_mmio_raz_wi,
>>> +       },
>>> +       {                       /* ICACTIVERn */
>>> +               .base           = 0x380,
>>> +               .len            = VGIC_NR_IRQS / 8,
>>> +               .handle_mmio    = handle_mmio_raz_wi,
>>> +       },
>>> +       {                       /* IPRIORITYRn */
>>> +               .base           = 0x400,
>>> +               .len            = VGIC_NR_IRQS,
>>> +               .handle_mmio    = handle_mmio_priority_reg,
>>> +       },
>>> +       {                       /* ITARGETSRn */
>>> +               .base           = 0x800,
>>> +               .len            = VGIC_NR_IRQS,
>>> +               .handle_mmio    = handle_mmio_target_reg,
>>> +       },
>>> +       {                       /* ICFGRn */
>>> +               .base           = 0xC00,
>>> +               .len            = VGIC_NR_IRQS / 4,
>>> +               .handle_mmio    = handle_mmio_cfg_reg,
>>> +       },
>>> +       {                       /* SGIRn */
>>> +               .base           = 0xF00,
>>> +               .len            = 4,
>>> +               .handle_mmio    = handle_mmio_sgi_reg,
>>> +       },
>>>         {}
>>>  };
>>>
>>> @@ -123,5 +476,97 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
>>>   */
>>>  int vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio)
>>>  {
>>> -       return KVM_EXIT_MMIO;
>>> +       const struct mmio_range *range;
>>> +       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
>>> +       unsigned long base = dist->vgic_dist_base;
>>> +
>>> +       if (!irqchip_in_kernel(vcpu->kvm) ||
>>> +           mmio->mmio.phys_addr < base ||
>>> +           (mmio->mmio.phys_addr + mmio->mmio.len) > (base + dist->vgic_dist_size))
>>> +               return KVM_EXIT_MMIO;
>>> +
>>> +       range = find_matching_range(vgic_ranges, mmio, base);
>>> +       if (unlikely(!range || !range->handle_mmio)) {
>>> +               pr_warn("Unhandled access %d %08llx %d\n",
>>> +                       mmio->mmio.is_write, mmio->mmio.phys_addr, mmio->mmio.len);
>>> +               return KVM_EXIT_MMIO;
>>
>> is this the right action to take? let QEMU emulate access to something
>> all of the sudden?
>>
>> how about a data abort instead?
>
> It is likely that it is what will actually happen.
>
>>> +       }
>>> +
>>> +       spin_lock(&vcpu->kvm->arch.vgic.lock);
>>> +       kvm_debug("emulating %d %08llx %d\n", mmio->mmio.is_write,
>>> +                 mmio->mmio.phys_addr, mmio->mmio.len);
>>
>> trace event?
>
> ok.
>
>>> +       range->handle_mmio(vcpu, mmio, mmio->mmio.phys_addr - range->base - base);
>>> +       run->mmio = mmio->mmio;
>>> +       kvm_handle_mmio_return(vcpu, run);
>>> +       spin_unlock(&vcpu->kvm->arch.vgic.lock);
>>> +
>>> +       return KVM_EXIT_UNKNOWN;
>>> +}
>>> +
>>> +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
>>> +{
>>> +       struct kvm *kvm = vcpu->kvm;
>>> +       struct vgic_dist *dist = &kvm->arch.vgic;
>>> +       int nrcpus = atomic_read(&kvm->online_vcpus);
>>> +       u8 target_cpus;
>>> +       int sgi, mode, c, vcpu_id;
>>> +
>>> +       vcpu_id = vcpu->vcpu_id;
>>
>> consider: s/vcpu_id/src_id/
>
> ok.
>
>>> +
>>> +       sgi = reg & 0xf;
>>> +       target_cpus = (reg >> 16) & 0xff;
>>> +       mode = (reg >> 24) & 3;
>>> +
>>> +       switch (mode) {
>>> +       case 0:
>>> +               if (!target_cpus)
>>> +                       return;
>>> +
>>> +       case 1:
>>> +               target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
>>> +               break;
>>> +
>>> +       case 2:
>>> +               target_cpus = 1 << vcpu_id;
>>> +               break;
>>> +       }
>>> +
>>> +       kvm_for_each_vcpu(c, vcpu, kvm) {
>>> +               if (target_cpus & 1) {
>>> +                       /* Flag the SGI as pending */
>>> +                       vgic_bitmap_set_irq_val(&dist->irq_state, c, sgi, 1);
>>> +                       dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
>>> +                       kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
>>
>> trace event?
>
> ok.
>
>>> +               }
>>> +
>>> +               target_cpus >>= 1;
>>> +       }
>>> +}
>>> +
>>> +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
>>> +{
>>> +       return 0;
>>> +}
>>> +
>>> +/*
>>> + * Update the interrupt state and determine which CPUs have pending
>>> + * interrupts. Must be called with distributor lock held.
>>> + */
>>> +static void vgic_update_state(struct kvm *kvm)
>>> +{
>>> +       struct vgic_dist *dist = &kvm->arch.vgic;
>>> +       struct kvm_vcpu *vcpu;
>>> +       int c;
>>> +
>>> +       if (!dist->enabled) {
>>> +               set_bit(0, &dist->irq_pending_on_cpu);
>>> +               return;
>>> +       }
>>> +
>>> +       kvm_for_each_vcpu(c, vcpu, kvm) {
>>> +               if (compute_pending_for_cpu(vcpu)) {
>>> +                       pr_debug("CPU%d has pending interrupts\n", c);
>>
>> seems like excessive printing :)
>
> pr_debug is compiled out until you define DEBUG at the top of this file.
>
>>> +                       set_bit(1 << c, &dist->irq_pending_on_cpu);
>>> +               }
>>> +       }
>>>  }
>>> --
>>> 1.7.10.3
>>>
>>>
>>
>
>
> --
> Jazz is not dead. It just smells funny...
>
_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm