[Android-virt] [PATCH 04/15] ARM: KVM: VGIC distributor handling

c.dall at virtualopensystems.com (Christoffer Dall) · Wed, 20 Jun 2012 19:02:48 -0400

On Mon, May 14, 2012 at 9:05 AM, Marc Zyngier <marc.zyngier at arm.com> wrote:
> Add the GIC distributor emulation code. A number of the GIC features
> are simply ignored as they are not required to boot a Linux guest.
>
> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
> ---
> ?arch/arm/include/asm/kvm_vgic.h | ?101 +++++++++++++
> ?arch/arm/kvm/vgic.c ? ? ? ? ? ? | ?315 +++++++++++++++++++++++++++++++++++++++
> ?2 files changed, 416 insertions(+), 0 deletions(-)
>
> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
> index 34f5cc0..dca2d09 100644
> --- a/arch/arm/include/asm/kvm_vgic.h
> +++ b/arch/arm/include/asm/kvm_vgic.h
> @@ -1,7 +1,108 @@
> ?#ifndef __ASM_ARM_KVM_VGIC_H
> ?#define __ASM_ARM_KVM_VGIC_H
>
> +#include <linux/kernel.h>
> +#include <linux/kvm.h>
> +#include <linux/irqreturn.h>
> +#include <linux/spinlock.h>
> +#include <linux/types.h>
> +
> +#define VGIC_NR_IRQS ? 128 ? ? ? ? ? /* Arbitrary number */

perhaps mention that the struct definitions break if this is not a power of two.

perhaps also mention (or check like below) that the number of maximum
IRQs are 1024 (even though max must value is 1019 for non-reserved.

> +#define VGIC_MAX_CPUS ?KVM_MAX_VCPUS /* Same as the HW GIC */
> +
> +#if (VGIC_MAX_CPUS > 8)
> +#error Invalid number of CPU interfaces
> +#endif
> +
> +/*
> + * The GIC registers describing interrupts have two parts:

these are specifically GIC distributor registers right?

> + * - 32 per-CPU interrupts (SGI + PPI)
> + * - a bunch of global interrups (SPI)
> + * They can have 1, 2 or 8 bit fields. Make it easier by having some template
> + * to create the structures and the accessors.
> + */
> +#define DEFINE_VGIC_MAP_STRUCT(typename, size) ? ? ? ? ? ? ? ? ? ? ? ? ? \
> +struct typename { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? union { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? ? ? ? ? u32 reg[32 / (sizeof(u32) * 8 / size)]; ? ? ? ? ? ? ? ? ? \

32 / (sizeof(u32) * 8 / size) == size

this should be pretty much no matter the architecture, compiler, etc. No?

> + ? ? ? ? ? ? ? unsigned long reg_ul[0]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? } percpu[VGIC_MAX_CPUS]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? union { ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? ? ? ? ? u32 reg[(VGIC_NR_IRQS - 32) / (sizeof(u32) * 8 / size)]; ?\

consider defining VGIC_NR_GLOB_IRQS (VGIC_NR_IRQS - 32)
and do:

u32 reg[(VGIC_N_GLOB_IRQS * size) / sizeof(u32)];

> + ? ? ? ? ? ? ? unsigned long reg_ul[0]; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? } global; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> +}; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> +static inline u32 *typename##_get_reg(struct typename *x, ? ? ? ? ? ? ? ?\
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int cpuid, u32 offset) ? ? ? ? ? ? ?\
> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? static const int irq_per_u32 = sizeof(u32) * 8 / size; ? ? ? ? ? ?\
> + ? ? ? static const int glob_offset = 32 / irq_per_u32; ? ? ? ? ? ? ? ? ?\
> + ? ? ? offset >>= 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? BUG_ON(offset > (VGIC_NR_IRQS ?/ irq_per_u32)); ? ? ? ? ? ? ? ? ? \
> + ? ? ? if (offset < glob_offset) ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? ? ? ? ? return x->percpu[cpuid].reg + offset; ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? else ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? ? ? ? ? return x->global.reg + offset - glob_offset; ? ? ? ? ? ? ?\
> +} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +static inline int typename##_get_irq_val(struct typename *x, ? ? ? ? ? ? \
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int cpuid, int irq) ? ? ? ? ? ? ?\
> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? static const int irq_per_u32 = sizeof(u32) * 8 / size; ? ? ? ? ? ?\
> + ? ? ? static const u32 mask = (1 << size) - 1; ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? u32 *reg, offset, shift; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? offset = (irq / irq_per_u32) << 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? shift = (irq % irq_per_u32) * size; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? reg = typename##_get_reg(x, cpuid, offset); ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? return (*reg >> shift) & mask; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +static inline void typename##_set_irq_val(struct typename *x, ? ? ? ? ? ?\
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?int cpuid, int irq, int val) ? ? \
> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? static const int irq_per_u32 = sizeof(u32) * 8 / size; ? ? ? ? ? ?\
> + ? ? ? static const u32 mask = (1 << size) - 1; ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? u32 *reg, offset, shift; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? offset = (irq / irq_per_u32) << 2; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? shift = (irq % irq_per_u32) * size; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? reg = typename##_get_reg(x, cpuid, offset); ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? *reg &= ~(mask << shift); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
> + ? ? ? *reg |= (val & mask) << shift; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +} ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +static inline unsigned long *typename##_get_cpu_map(struct typename *x, ? ? ? ? ?\
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int cpu_id) ? ? ? ? ? \
> +{ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? if (unlikely(cpu_id >= VGIC_MAX_CPUS)) ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? ? ? ? ? return NULL; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> + ? ? ? return x->percpu[cpu_id].reg_ul; ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?\
> +}

this macro makes me want to cry a little :)

couple of reasons I don't like it: it ruins all chances of
grep/cscope. No syntax highlighting (I know you don't use it, but
others do). I don't understand the real benefit.

could you not have:

struct vgic_bitmap {
        u32 percpu[VGIC_MAX_CPUS];
        u32 global[VGIC_NR_GLOB_IRQS / 32];
};
struct vgic_2bitmap {
        u32 percpu[VGIC_MAX_CPUS * 2];
        u32 global[(VGIC_NR_GLOB_IRQS * 2) / 32];
};
struct vgic_bytemap {
        u32 percpu[VGIC_MAX_CPUS * 8];
        u32 global[(VGIC_NR_GLOB_IRQS * 8) / 32];
};

or, if you use the accessors all the time, maybe drop the
percpu/global union split:

struct vgic_bitmap {
        u32 data[VGIC_MAX_CPUS + (VGIC_NR_GLOB_IRQS / 32)];
};
struct vgic_2bitmap {
        u32 data[2 * (VGIC_MAX_CPUS + (VGIC_NR_GLOB_IRQS / 32))];
};
struct vgic_bytemap {
        u32 data[8 * (VGIC_MAX_CPUS + (VGIC_NR_GLOB_IRQS / 32))];
};

but I prefer the first version.

The accessors with BUG_ON in them are also a bit scary.

Perhaps you should have:

struct vgic_map {
    union {
        struct vgic_bitmap;
        struct vgic_2bitmap;
        struct vgic_bytemap;
    };
};

u32 *vgic_dist_get_reg(struct vgic_map *map, int field_size, int
cpuid, u32 offset)
{
}

and then have static inline wrappers for bitmap_get_reg(struct
vgic_bitmap *map, ...) etc.

should also produce less code and there should be very little
performance hit as most divisions can probably be done using a shift.

> +
> +
> +DEFINE_VGIC_MAP_STRUCT(vgic_bitmap, 1);
> +DEFINE_VGIC_MAP_STRUCT(vgic_2bitmap, 2);
> +DEFINE_VGIC_MAP_STRUCT(vgic_bytemap, 8);
> +
> ?struct vgic_dist {
> +#ifdef CONFIG_KVM_ARM_VGIC
> + ? ? ? spinlock_t ? ? ? ? ? ? ?lock;
> +
> + ? ? ? void __iomem ? ? ? ? ? ?*vctrl_base;
> +
> + ? ? ? u32 ? ? ? ? ? ? ? ? ? ? enabled;
> +
> + ? ? ? struct vgic_bitmap ? ? ?irq_enabled;
> + ? ? ? struct vgic_bitmap ? ? ?irq_pending;
> + ? ? ? struct vgic_bitmap ? ? ?irq_active; /* Not used yet. Useful? */

are these not only used to preserve the GIC state for
migration/powerloss and therefore not relevant when already held in
main memory? If set to pending, figuring out if the interrupt is
active, I guess we can just read the List registers, right?

> +
> + ? ? ? struct vgic_bytemap ? ? irq_priority;/* Not used yet. Useful? */

if we don't use these fields and have no way of testing them, I
suggest removing them and if we need to remember that they could go
here, there could be a comment on the structure itself.

> + ? ? ? struct vgic_bytemap ? ? irq_target;
> +
> + ? ? ? struct vgic_2bitmap ? ? irq_cfg; /* Not used yet. Useful? */

if it's implementation defined if this is supported for any PPI then
it seems unlikely we will ever have guests running that relies on the
fact that some PPIs can be configured to be edge-triggered, Linux uses
level-triggered interrupts for everything it can program anyway, iirc.

if we take this out we can save all the 2bitmap stuff above as well right?

> +
> + ? ? ? u8 ? ? ? ? ? ? ? ? ? ? ?irq_sgi_sources[VGIC_MAX_CPUS][16];
> +
> + ? ? ? struct vgic_bitmap ? ? ?irq_spi_target[VGIC_MAX_CPUS];

why do you need a per-cpu view of something banked per-cpu?

> +
> + ? ? ? atomic_t ? ? ? ? ? ? ? ?irq_pending_on_cpu;
> +#endif
> ?};
>
> ?struct vgic_cpu {
> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
> index f7856a9..1ace859 100644
> --- a/arch/arm/kvm/vgic.c
> +++ b/arch/arm/kvm/vgic.c
> @@ -21,6 +21,10 @@
> ?#include <linux/interrupt.h>
> ?#include <linux/io.h>
>
> +/* Temporary hacks, need to probe DT instead */
> +#define VGIC_DIST_BASE ? ? ? ? 0x2c001000
> +#define VGIC_DIST_SIZE ? ? ? ? 0x1000
> +

what's involved in fixing this now? I assume this should come from
userspace who may read it off the supplied DT or?

> ?#define ACCESS_READ_VALUE ? ? ?(1 << 0)
> ?#define ACCESS_READ_RAZ ? ? ? ? ? ? ? ?(0 << 0)
> ?#define ACCESS_READ_MASK(x) ? ?((x) & (1 << 0))
> @@ -30,6 +34,9 @@
> ?#define ACCESS_WRITE_VALUE ? ? (3 << 1)
> ?#define ACCESS_WRITE_MASK(x) ? ((x) & (3 << 1))
>
> +static void vgic_update_state(struct kvm *kvm);
> +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
> +
> ?static void mmio_do_copy(struct kvm_run *run, u32 *reg, u32 offset, int mode)
> ?{
> ? ? ? ?int u32off = offset & 3;
> @@ -83,6 +90,182 @@ static void mmio_do_copy(struct kvm_run *run, u32 *reg, u32 offset, int mode)
> ? ? ? ?}
> ?}
>
> +static void handle_mmio_misc(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 reg;
> + ? ? ? u32 u32off = offset & 3;
> +
> + ? ? ? switch (offset & ~3) {
> + ? ? ? case 0: ? ? ? ? ? ? ? ? /* CTLR */
> + ? ? ? ? ? ? ? reg = vcpu->kvm->arch.vgic.enabled;
> + ? ? ? ? ? ? ? mmio_do_copy(run, &reg, u32off,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
> + ? ? ? ? ? ? ? if (run->mmio.is_write) {
> + ? ? ? ? ? ? ? ? ? ? ? vcpu->kvm->arch.vgic.enabled = reg & 1;
> + ? ? ? ? ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
> + ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? break;
> +
> + ? ? ? case 4: ? ? ? ? ? ? ? ? /* TYPER */
> + ? ? ? ? ? ? ? reg ?= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
> + ? ? ? ? ? ? ? reg |= (VGIC_NR_IRQS >> 5) - 1;
> + ? ? ? ? ? ? ? mmio_do_copy(run, &reg, u32off,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
> + ? ? ? ? ? ? ? break;
> +
> + ? ? ? case 8: ? ? ? ? ? ? ? ? /* IIDR */
> + ? ? ? ? ? ? ? reg = 0x4B00043B;

shouldn't this be keyed off something or is this fixed for the GICv2
something ARM specific?

> + ? ? ? ? ? ? ? mmio_do_copy(run, &reg, u32off,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
> + ? ? ? ? ? ? ? break;
> + ? ? ? }
> +}
> +
> +static void handle_mmio_group_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
> +{
> + ? ? ? mmio_do_copy(run, NULL, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
> +}
> +
> +static void handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
> + ? ? ? if (run->mmio.is_write)
> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
> +}
> +
> +static void handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
> + ? ? ? if (run->mmio.is_write && offset < 4) /* Force SGI enabled */
> + ? ? ? ? ? ? ? *reg |= 0xffff;

why no vgic_update_state(vcpu->kvm) here?

> +}
> +
> +static void handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
> + ? ? ? if (run->mmio.is_write)
> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
> +}
> +
> +static void handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);

why no vgic_update_state(vcpu->kvm) here?

> +}
> +
> +static void handle_mmio_set_active_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_active,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
> +}
> +
> +static void handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_active,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
> +}

if these active register read/writes should be here, shouldn't they
talk to the List registers (or whatever we use to back these values -
haven't read that part yet)?

> +
> +static void handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);

shouldn't this call vgic_update_state(vcpu->kvm) to check the running
priority of the virtual cpu interface?

> +}
> +
> +static void update_spi_target(struct kvm *kvm)
> +{
> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic;
> + ? ? ? int c, i, nrcpus = atomic_read(&kvm->online_vcpus);
> + ? ? ? u8 targ;
> + ? ? ? unsigned long *bmap;
> +
> + ? ? ? for (i = 32; i < VGIC_NR_IRQS; i++) {
> + ? ? ? ? ? ? ? targ = vgic_bytemap_get_irq_val(&dist->irq_target, 0, i);
> +

ah, I see, cpu=0 is because we're always using i >= 32.

> + ? ? ? ? ? ? ? for (c = 0; c < nrcpus; c++) {
> + ? ? ? ? ? ? ? ? ? ? ? bmap = dist->irq_spi_target[c].global.reg_ul;
> +
> + ? ? ? ? ? ? ? ? ? ? ? if (targ & (1 << c))
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? set_bit(i - 32, bmap);
> + ? ? ? ? ? ? ? ? ? ? ? else
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? clear_bit(i - 32, bmap);
> + ? ? ? ? ? ? ? }
> + ? ? ? }
> +}

I'm reading this as an optimization so you can easily determine if a
specific VCPU should get the SPI? Or am I getting this entirely wrong?

At what time is this performance critical operation performed?

Perhaps a comment would be very helpful. This makes me wonder, doesn't
Linux just configure cpu 0 to take all physical interrupts or did I
glance over this code too soon? Do we have any reason for this
complexity for VMs at this point or could we just pretend that ll
physical SPIs were statically assigned to CPU0? Performance issues?

> +
> +static void handle_mmio_target_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg;
> +
> + ? ? ? /* We treat the banked interrupts targets as read-only */
> + ? ? ? if (offset < 32) {

why is this < 32 ? shouldn't it be < 8 ?

> + ? ? ? ? ? ? ? u32 roreg = 1 << vcpu->vcpu_id;
> + ? ? ? ? ? ? ? roreg |= roreg << 8;
> + ? ? ? ? ? ? ? roreg |= roreg << 16;
> +
> + ? ? ? ? ? ? ? mmio_do_copy(run, &roreg, offset,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
> + ? ? ? ? ? ? ? return;
> + ? ? ? }
> +
> + ? ? ? reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_target,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
> + ? ? ? if (run->mmio.is_write) {
> + ? ? ? ? ? ? ? update_spi_target(vcpu->kvm);
> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
> + ? ? ? }
> +}
> +
> +static void handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 *reg = vgic_2bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vcpu->vcpu_id, offset);
> + ? ? ? mmio_do_copy(run, reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);

shouldn't we make sure that SGIs are RAO here?

otherwise, could we just do RAZ/WI here?

> +}
> +
> +static void handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
> +{
> + ? ? ? u32 reg;
> + ? ? ? mmio_do_copy(run, &reg, offset,
> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
> + ? ? ? if (run->mmio.is_write) {
> + ? ? ? ? ? ? ? vgic_dispatch_sgi(vcpu, reg);
> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
> + ? ? ? }
> +}
> +
> ?/* All this should really be generic code... FIXME!!! */
> ?struct mmio_range {
> ? ? ? ?unsigned long base;
> @@ -92,6 +275,66 @@ struct mmio_range {
> ?};
>
> ?static const struct mmio_range vgic_ranges[] = {
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* CTRL, TYPER, IIDR */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= 12,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_misc,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* IGROUPRn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x80,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,

what happens if this is not byte aligned and the guest reads the
entire register, then find_matching_range will not find this - is this
a supported situation?

> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_group_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISENABLERn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x100,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_enable_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICENABLERn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x180,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_enable_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISPENDRn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x200,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_pending_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICPENDRn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x280,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_pending_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISACTIVERn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x300,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_active_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICACTIVERn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x380,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_active_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* IPRIORITYRn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x400,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_priority_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ITARGETSRn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x800,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_target_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICFGRn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0xC00,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 4,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_cfg_reg,
> + ? ? ? },
> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* SGIRn */
> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0xF00,
> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= 4,
> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_sgi_reg,
> + ? ? ? },
> ? ? ? ?{}
> ?};
>
> @@ -121,10 +364,82 @@ int vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run)
> ? ? ? ?if (!range || !range->handle_mmio)
> ? ? ? ? ? ? ? ?return KVM_EXIT_MMIO;
>
> + ? ? ? spin_lock(&vcpu->kvm->arch.vgic.lock);
> ? ? ? ?pr_debug("emulating %d %08llx %d\n", run->mmio.is_write,
> ? ? ? ? ? ? ? ? run->mmio.phys_addr, run->mmio.len);
> ? ? ? ?range->handle_mmio(vcpu, run, run->mmio.phys_addr - range->base);
> ? ? ? ?kvm_handle_mmio_return(vcpu, run);
> + ? ? ? spin_unlock(&vcpu->kvm->arch.vgic.lock);
>
> ? ? ? ?return KVM_EXIT_UNKNOWN;
> ?}
> +
> +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
> +{
> + ? ? ? struct kvm *kvm = vcpu->kvm;
> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic;
> + ? ? ? int nrcpus = atomic_read(&kvm->online_vcpus);
> + ? ? ? u8 target_cpus;
> + ? ? ? int sgi, mode, c, vcpu_id;
> +
> + ? ? ? vcpu_id = vcpu->vcpu_id;
> +
> + ? ? ? sgi = reg & 0xf;
> + ? ? ? target_cpus = (reg >> 16) & 0xff;

no worries about the NSATT bit here? I'm a little fuzzy about the
interrupt grouping and how that works with a guest always (or maybe
not thinking so?) running in non-secure mode.

> + ? ? ? mode = (reg >> 24) & 3;
> +
> + ? ? ? switch (mode) {
> + ? ? ? case 0:
> + ? ? ? ? ? ? ? if (!target_cpus)
> + ? ? ? ? ? ? ? ? ? ? ? return;
> +
> + ? ? ? case 1:
> + ? ? ? ? ? ? ? target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
> + ? ? ? ? ? ? ? break;
> +
> + ? ? ? case 2:
> + ? ? ? ? ? ? ? target_cpus = 1 << vcpu_id;
> + ? ? ? ? ? ? ? break;
> + ? ? ? }
> +
> + ? ? ? for (c = 0; c < nrcpus; c++) {
> + ? ? ? ? ? ? ? if (target_cpus & 1) {
> + ? ? ? ? ? ? ? ? ? ? ? /* Flag the SGI as pending */
> + ? ? ? ? ? ? ? ? ? ? ? vgic_bitmap_set_irq_val(&dist->irq_pending, c, sgi, 1);
> + ? ? ? ? ? ? ? ? ? ? ? dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
> + ? ? ? ? ? ? ? ? ? ? ? pr_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);

should this not rather be a trace event?

> + ? ? ? ? ? ? ? }
> +
> + ? ? ? ? ? ? ? target_cpus >>= 1;
> + ? ? ? }
> +}
> +
> +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
> +{
> + ? ? ? return 0;
> +}
> +
> +/*
> + * Update the interrupt state and determine which CPUs have pending
> + * interrupts. Must be called with distributor lock held.
> + */
> +static void vgic_update_state(struct kvm *kvm)
> +{
> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic;
> + ? ? ? int nrcpus = atomic_read(&kvm->online_vcpus);
> + ? ? ? int c;
> +
> + ? ? ? if (!dist->enabled) {
> + ? ? ? ? ? ? ? atomic_set(&dist->irq_pending_on_cpu, 0);
> + ? ? ? ? ? ? ? return;
> + ? ? ? }
> +
> + ? ? ? for (c = 0; c < nrcpus; c++) {
> + ? ? ? ? ? ? ? struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, c);
> +
> + ? ? ? ? ? ? ? if (compute_pending_for_cpu(vcpu)) {
> + ? ? ? ? ? ? ? ? ? ? ? pr_debug("CPU%d has pending interrupts\n", c);
> + ? ? ? ? ? ? ? ? ? ? ? atomic_or((1 << c), &dist->irq_pending_on_cpu);

this may be simpler with set_bit(c, ....) and have pending_on_cpu be
an unsigned long. not important.

> + ? ? ? ? ? ? ? }
> + ? ? ? }
> +}
> --
> 1.7.7.1
>
>
>
> _______________________________________________
> Android-virt mailing list
> Android-virt at lists.cs.columbia.edu
> https://lists.cs.columbia.edu/cucslists/listinfo/android-virt