[Android-virt] [PATCH 04/15] ARM: KVM: VGIC distributor handling

c.dall at virtualopensystems.com (Christoffer Dall) · Thu, 21 Jun 2012 16:58:01 -0400

[snip]

> OK, point taken. I'll have a look at reworking the macro mess, but will
> focus on the rest of the review for the time being.

I didn't review the accessor functions as I assume you'll respin the
patch with adjustments, and I'll look at it then.

[snip]

>>> ?struct vgic_dist {
>>> +#ifdef CONFIG_KVM_ARM_VGIC
>>> + ? ? ? spinlock_t ? ? ? ? ? ? ?lock;
>>> +
>>> + ? ? ? void __iomem ? ? ? ? ? ?*vctrl_base;
>>> +
>>> + ? ? ? u32 ? ? ? ? ? ? ? ? ? ? enabled;
>>> +
>>> + ? ? ? struct vgic_bitmap ? ? ?irq_enabled;
>>> + ? ? ? struct vgic_bitmap ? ? ?irq_pending;
>>> + ? ? ? struct vgic_bitmap ? ? ?irq_active; /* Not used yet. Useful? */
>>
>> are these not only used to preserve the GIC state for
>> migration/powerloss and therefore not relevant when already held in
>> main memory? If set to pending, figuring out if the interrupt is
>> active, I guess we can just read the List registers, right?
>
> We could indeed remove it, and just ignores the writes to this registers.
>

I think we would want to support migration as a general concept, but
probably not between non-kvm accelerated qemu environments and
accelerated ones. So, we might need a way for userspace to tell KVM
that an interrupt is active, but getting the information out we can
query the list registers right?

>>> +
>>> + ? ? ? struct vgic_bytemap ? ? irq_priority;/* Not used yet. Useful? */
>>
>> if we don't use these fields and have no way of testing them, I
>> suggest removing them and if we need to remember that they could go
>> here, there could be a comment on the structure itself.
>
> It could be used to restore a KVM-based VM to a QEMU-based system. If we
> don't plan to support this, I'll just nuke the field.
>

not sure why this field is necessary for that kind of migration, but I
don't think we will support that case.

>>
>>> + ? ? ? struct vgic_bytemap ? ? irq_target;
>>> +
>>> + ? ? ? struct vgic_2bitmap ? ? irq_cfg; /* Not used yet. Useful? */
>>
>> if it's implementation defined if this is supported for any PPI then
>> it seems unlikely we will ever have guests running that relies on the
>> fact that some PPIs can be configured to be edge-triggered, Linux uses
>> level-triggered interrupts for everything it can program anyway, iirc.
>>
>> if we take this out we can save all the 2bitmap stuff above as well right?
>
> Yes, same as above.
>
>>> +
>>> + ? ? ? u8 ? ? ? ? ? ? ? ? ? ? ?irq_sgi_sources[VGIC_MAX_CPUS][16];
>>> +
>>> + ? ? ? struct vgic_bitmap ? ? ?irq_spi_target[VGIC_MAX_CPUS];
>>
>> why do you need a per-cpu view of something banked per-cpu?
>
> We're loosing a bit of memory here (one u32 per CPU). Not really a big
> deal, but could be optimized if necessary.
>

I'm not concerned about the memory, I'm just saying looking at the
struct itself it doesn't make much sense to me, so either a comment
explaining it or not having it.

>>> +
>>> + ? ? ? atomic_t ? ? ? ? ? ? ? ?irq_pending_on_cpu;
>>> +#endif
>>> ?};
>>>
>>> ?struct vgic_cpu {
>>> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
>>> index f7856a9..1ace859 100644
>>> --- a/arch/arm/kvm/vgic.c
>>> +++ b/arch/arm/kvm/vgic.c
>>> @@ -21,6 +21,10 @@
>>> ?#include <linux/interrupt.h>
>>> ?#include <linux/io.h>
>>>
>>> +/* Temporary hacks, need to probe DT instead */
>>> +#define VGIC_DIST_BASE ? ? ? ? 0x2c001000
>>> +#define VGIC_DIST_SIZE ? ? ? ? 0x1000
>>> +
>>
>> what's involved in fixing this now? I assume this should come from
>> userspace who may read it off the supplied DT or?
>
> It is still something we have to work out with Peter. Probably involve
> playing with PERIPHBASE instead of the DT. Will have to come from
> userspace anyway.
>

ok, so I read on in some of your patches and saw that you're actually
modifying it somewhat.

>>> ?#define ACCESS_READ_VALUE ? ? ?(1 << 0)
>>> ?#define ACCESS_READ_RAZ ? ? ? ? ? ? ? ?(0 << 0)
>>> ?#define ACCESS_READ_MASK(x) ? ?((x) & (1 << 0))
>>> @@ -30,6 +34,9 @@
>>> ?#define ACCESS_WRITE_VALUE ? ? (3 << 1)
>>> ?#define ACCESS_WRITE_MASK(x) ? ((x) & (3 << 1))
>>>
>>> +static void vgic_update_state(struct kvm *kvm);
>>> +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
>>> +
>>> ?static void mmio_do_copy(struct kvm_run *run, u32 *reg, u32 offset, int mode)
>>> ?{
>>> ? ? ? ?int u32off = offset & 3;
>>> @@ -83,6 +90,182 @@ static void mmio_do_copy(struct kvm_run *run, u32 *reg, u32 offset, int mode)
>>> ? ? ? ?}
>>> ?}
>>>
>>> +static void handle_mmio_misc(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 reg;
>>> + ? ? ? u32 u32off = offset & 3;
>>> +
>>> + ? ? ? switch (offset & ~3) {
>>> + ? ? ? case 0: ? ? ? ? ? ? ? ? /* CTLR */
>>> + ? ? ? ? ? ? ? reg = vcpu->kvm->arch.vgic.enabled;
>>> + ? ? ? ? ? ? ? mmio_do_copy(run, &reg, u32off,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
>>> + ? ? ? ? ? ? ? if (run->mmio.is_write) {
>>> + ? ? ? ? ? ? ? ? ? ? ? vcpu->kvm->arch.vgic.enabled = reg & 1;
>>> + ? ? ? ? ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
>>> + ? ? ? ? ? ? ? }
>>> + ? ? ? ? ? ? ? break;
>>> +
>>> + ? ? ? case 4: ? ? ? ? ? ? ? ? /* TYPER */
>>> + ? ? ? ? ? ? ? reg ?= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
>>> + ? ? ? ? ? ? ? reg |= (VGIC_NR_IRQS >> 5) - 1;
>>> + ? ? ? ? ? ? ? mmio_do_copy(run, &reg, u32off,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
>>> + ? ? ? ? ? ? ? break;
>>> +
>>> + ? ? ? case 8: ? ? ? ? ? ? ? ? /* IIDR */
>>> + ? ? ? ? ? ? ? reg = 0x4B00043B;
>>
>> shouldn't this be keyed off something or is this fixed for the GICv2
>> something ARM specific?
>
> 0x4B is 'K'. 0x43B is the code for ARM. I read it as KVM/ARM, but that's
> only me...
>

good enough for me :)

>>> + ? ? ? ? ? ? ? mmio_do_copy(run, &reg, u32off,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
>>> + ? ? ? ? ? ? ? break;
>>> + ? ? ? }
>>> +}
>>> +
>>> +static void handle_mmio_group_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? mmio_do_copy(run, NULL, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
>>> +}
>>> +
>>> +static void handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
>>> + ? ? ? if (run->mmio.is_write)
>>> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
>>> +}
>>> +
>>> +static void handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
>>> + ? ? ? if (run->mmio.is_write && offset < 4) /* Force SGI enabled */
>>> + ? ? ? ? ? ? ? *reg |= 0xffff;
>>
>> why no vgic_update_state(vcpu->kvm) here?
>
> Because I always like to leave tiny little silly bugs behind me... ;-)
> Thanks for pointing this out.
>
>>> +}
>>> +
>>> +static void handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
>>> + ? ? ? if (run->mmio.is_write)
>>> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
>>> +}
>>> +
>>> +static void handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
>>
>> why no vgic_update_state(vcpu->kvm) here?
>
> For the same reason as above, and for the sake of cut-n-paste programming.
>
>>> +}
>>> +
>>> +static void handle_mmio_set_active_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_active,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
>>> +}
>>> +
>>> +static void handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_active,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
>>> +}
>>
>> if these active register read/writes should be here, shouldn't they
>> talk to the List registers (or whatever we use to back these values -
>> haven't read that part yet)?
>
> As agreed above, I'll convert these to WI/RAZ.
>
>>> +
>>> +static void handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
>>
>> shouldn't this call vgic_update_state(vcpu->kvm) to check the running
>> priority of the virtual cpu interface?
>
> No, we do not take priority into account to deliver an interrupt, as
> Linux doesn't use this. Can be introduced later if necessary.
>

then at least a TODO, or NOTE comment or something should be placed here.

>>> +}
>>> +
>>> +static void update_spi_target(struct kvm *kvm)
>>> +{
>>> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic;
>>> + ? ? ? int c, i, nrcpus = atomic_read(&kvm->online_vcpus);
>>> + ? ? ? u8 targ;
>>> + ? ? ? unsigned long *bmap;
>>> +
>>> + ? ? ? for (i = 32; i < VGIC_NR_IRQS; i++) {
>>> + ? ? ? ? ? ? ? targ = vgic_bytemap_get_irq_val(&dist->irq_target, 0, i);
>>> +
>>
>> ah, I see, cpu=0 is because we're always using i >= 32.
>
> Yeah, it's an ugly hack.
>
>>> + ? ? ? ? ? ? ? for (c = 0; c < nrcpus; c++) {
>>> + ? ? ? ? ? ? ? ? ? ? ? bmap = dist->irq_spi_target[c].global.reg_ul;
>>> +
>>> + ? ? ? ? ? ? ? ? ? ? ? if (targ & (1 << c))
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? set_bit(i - 32, bmap);
>>> + ? ? ? ? ? ? ? ? ? ? ? else
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? clear_bit(i - 32, bmap);
>>> + ? ? ? ? ? ? ? }
>>> + ? ? ? }
>>> +}
>>
>> I'm reading this as an optimization so you can easily determine if a
>> specific VCPU should get the SPI? Or am I getting this entirely wrong?
>
> It is a lot more than just an optimization. We really need to know which
> CPU is getting targeted by a specific interrupt.
>

yes, but can't we just look in the irq_target map when we need this information?

>> At what time is this performance critical operation performed?
>
> Only when writing to the target registers, which is hardly a performance
> critical thing.
>
>> Perhaps a comment would be very helpful. This makes me wonder, doesn't
>> Linux just configure cpu 0 to take all physical interrupts or did I
>> glance over this code too soon? Do we have any reason for this
>> complexity for VMs at this point or could we just pretend that ll
>> physical SPIs were statically assigned to CPU0? Performance issues?
>
> You can change the affinity of an interrupt at any time. The fact that
> Linux routes everything on CPU0 by default is just a default behavior. A
> short example while running an SMP VM:
>
> root at linaro:~# grep eth0 /proc/interrupts
> ?47: ? ? ? ? 20 ? ? ? ? ?0 ? ? ? GIC ?eth0
> root at linaro:~# cat /proc/irq/47/smp_affinity
> 3
> root at linaro:~# echo 2 > /proc/irq/47/smp_affinity
> root at linaro:~# ping 10.0.2.2
> PING 10.0.2.2 (10.0.2.2) 56(84) bytes of data.
> 64 bytes from 10.0.2.2: icmp_req=1 ttl=255 time=5.96 ms
> 64 bytes from 10.0.2.2: icmp_req=2 ttl=255 time=0.834 ms
> 64 bytes from 10.0.2.2: icmp_req=3 ttl=255 time=0.818 ms
> ^C
> --- 10.0.2.2 ping statistics ---
> 3 packets transmitted, 3 received, 0% packet loss, time 2002ms
> rtt min/avg/max/mdev = 0.818/2.540/5.968/2.423 ms
> root at linaro:~# grep eth0 /proc/interrupts
> ?47: ? ? ? ? 20 ? ? ? ? ?3 ? ? ? GIC ?eth0
>
>

neat, I didn't look into this at all.

>>> +
>>> +static void handle_mmio_target_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg;
>>> +
>>> + ? ? ? /* We treat the banked interrupts targets as read-only */
>>> + ? ? ? if (offset < 32) {
>>
>> why is this < 32 ? shouldn't it be < 8 ?
>
> This is organized as one byte per interrupt. Each bit in the byte is
> representing a CPU.
>

oh yes, my bad. sorry.

this should be RAZ if atomic_read(&kvm->online_vcpus) == 1 though right?

>>> + ? ? ? ? ? ? ? u32 roreg = 1 << vcpu->vcpu_id;
>>> + ? ? ? ? ? ? ? roreg |= roreg << 8;
>>> + ? ? ? ? ? ? ? roreg |= roreg << 16;
>>> +
>>> + ? ? ? ? ? ? ? mmio_do_copy(run, &roreg, offset,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
>>> + ? ? ? ? ? ? ? return;
>>> + ? ? ? }
>>> +
>>> + ? ? ? reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_target,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
>>> + ? ? ? if (run->mmio.is_write) {
>>> + ? ? ? ? ? ? ? update_spi_target(vcpu->kvm);
>>> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
>>> + ? ? ? }
>>> +}
>>> +
>>> +static void handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 *reg = vgic_2bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vcpu->vcpu_id, offset);
>>> + ? ? ? mmio_do_copy(run, reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
>>
>> shouldn't we make sure that SGIs are RAO here?
>
> Indeed.
>
>> otherwise, could we just do RAZ/WI here?
>
> Only if we discard the idea of migrating a VM to a non KVM environment.
>

As I wrote above, I think it's ok to discard that idea, but I don't
see the exact relation here. If QEMU needs to know the content of this
register, we can just return RAZ right?

>>> +}
>>> +
>>> +static void handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset)
>>> +{
>>> + ? ? ? u32 reg;
>>> + ? ? ? mmio_do_copy(run, &reg, offset,
>>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
>>> + ? ? ? if (run->mmio.is_write) {
>>> + ? ? ? ? ? ? ? vgic_dispatch_sgi(vcpu, reg);
>>> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm);
>>> + ? ? ? }
>>> +}
>>> +
>>> ?/* All this should really be generic code... FIXME!!! */
>>> ?struct mmio_range {
>>> ? ? ? ?unsigned long base;
>>> @@ -92,6 +275,66 @@ struct mmio_range {
>>> ?};
>>>
>>> ?static const struct mmio_range vgic_ranges[] = {
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* CTRL, TYPER, IIDR */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= 12,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_misc,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* IGROUPRn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x80,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
>>
>> what happens if this is not byte aligned and the guest reads the
>> entire register, then find_matching_range will not find this - is this
>> a supported situation?
>
> I'm not sure I understand what you mean here...
>

if (VGIC_NR_IRQS / 8) is not word-aligned and you read the last
register, this would fail - but it doesn't make sense when
VGIC_NR_IRQS needs to be 32(N+1).

It does make me think that it would be really nice to define the
VGIC_NR_IRQS like this:

#define VGIC_ITLINES 3
#define VGIC_NR_IRQS (32 * (VGIC_ITLINES + 1))

should shatter many doubts I had for a first read :)

>>
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_group_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISENABLERn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x100,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_enable_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICENABLERn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x180,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_enable_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISPENDRn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x200,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_pending_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICPENDRn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x280,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_pending_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISACTIVERn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x300,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_active_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICACTIVERn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x380,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_active_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* IPRIORITYRn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x400,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_priority_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ITARGETSRn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x800,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_target_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICFGRn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0xC00,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 4,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_cfg_reg,
>>> + ? ? ? },
>>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* SGIRn */
>>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0xF00,
>>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= 4,
>>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_sgi_reg,
>>> + ? ? ? },
>>> ? ? ? ?{}
>>> ?};
>>>
>>> @@ -121,10 +364,82 @@ int vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>> ? ? ? ?if (!range || !range->handle_mmio)
>>> ? ? ? ? ? ? ? ?return KVM_EXIT_MMIO;
>>>
>>> + ? ? ? spin_lock(&vcpu->kvm->arch.vgic.lock);
>>> ? ? ? ?pr_debug("emulating %d %08llx %d\n", run->mmio.is_write,
>>> ? ? ? ? ? ? ? ? run->mmio.phys_addr, run->mmio.len);
>>> ? ? ? ?range->handle_mmio(vcpu, run, run->mmio.phys_addr - range->base);
>>> ? ? ? ?kvm_handle_mmio_return(vcpu, run);
>>> + ? ? ? spin_unlock(&vcpu->kvm->arch.vgic.lock);
>>>
>>> ? ? ? ?return KVM_EXIT_UNKNOWN;
>>> ?}
>>> +
>>> +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
>>> +{
>>> + ? ? ? struct kvm *kvm = vcpu->kvm;
>>> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic;
>>> + ? ? ? int nrcpus = atomic_read(&kvm->online_vcpus);
>>> + ? ? ? u8 target_cpus;
>>> + ? ? ? int sgi, mode, c, vcpu_id;
>>> +
>>> + ? ? ? vcpu_id = vcpu->vcpu_id;
>>> +
>>> + ? ? ? sgi = reg & 0xf;
>>> + ? ? ? target_cpus = (reg >> 16) & 0xff;
>>
>> no worries about the NSATT bit here? I'm a little fuzzy about the
>> interrupt grouping and how that works with a guest always (or maybe
>> not thinking so?) running in non-secure mode.
>
> The notion of group only makes sense in secure mode. In non-secure mode,
> you only see the interrupts that are flagged as group-1 by the secure
> mode. These interrupts appear as group-0 on the non-secure side.
>

ok, thanks.

>>> + ? ? ? mode = (reg >> 24) & 3;
>>> +
>>> + ? ? ? switch (mode) {
>>> + ? ? ? case 0:
>>> + ? ? ? ? ? ? ? if (!target_cpus)
>>> + ? ? ? ? ? ? ? ? ? ? ? return;
>>> +
>>> + ? ? ? case 1:
>>> + ? ? ? ? ? ? ? target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
>>> + ? ? ? ? ? ? ? break;
>>> +
>>> + ? ? ? case 2:
>>> + ? ? ? ? ? ? ? target_cpus = 1 << vcpu_id;
>>> + ? ? ? ? ? ? ? break;
>>> + ? ? ? }
>>> +
>>> + ? ? ? for (c = 0; c < nrcpus; c++) {
>>> + ? ? ? ? ? ? ? if (target_cpus & 1) {
>>> + ? ? ? ? ? ? ? ? ? ? ? /* Flag the SGI as pending */
>>> + ? ? ? ? ? ? ? ? ? ? ? vgic_bitmap_set_irq_val(&dist->irq_pending, c, sgi, 1);
>>> + ? ? ? ? ? ? ? ? ? ? ? dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
>>> + ? ? ? ? ? ? ? ? ? ? ? pr_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
>>
>> should this not rather be a trace event?
>
> Possibly. I haven't looked at the trace events yet. Any suggestion?
>

just define a trace event in arch/arm/kvm/trace.h and call that from
here with the right parameters, e.g.:

+TRACE_EVENT(kvm_sgi,
+	TP_PROTO(int, int, int),
+	TP_ARGS(sgi, source_vcpu, dest_vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	sgi		)
+		__field(	unsigned long,	source_vcpu	)
+		__field(	unsigned long,	dest_vcpu	)
+	),
+
+	TP_fast_assign(
+		__entry->sgi			= sgi;
+		__entry->source_vcpu		= source_vcpu;
+		__entry->dest_vcpu		= dest_vcpu;
+	),
+
+	TP_printk("SGI %d from VCPU %d to VCPU %d",
+		  __entry->sgi, __entry->source_vcpu, __entry->dest_vcpu)
+);

[snip]

+trace_kvm_emulate_cp15_imp(sgi, vcpu_id, c);

>>> + ? ? ? ? ? ? ? }
>>> +
>>> + ? ? ? ? ? ? ? target_cpus >>= 1;
>>> + ? ? ? }
>>> +}
>>> +
>>> +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
>>> +{
>>> + ? ? ? return 0;
>>> +}
>>> +
>>> +/*
>>> + * Update the interrupt state and determine which CPUs have pending
>>> + * interrupts. Must be called with distributor lock held.
>>> + */
>>> +static void vgic_update_state(struct kvm *kvm)
>>> +{
>>> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic;
>>> + ? ? ? int nrcpus = atomic_read(&kvm->online_vcpus);
>>> + ? ? ? int c;
>>> +
>>> + ? ? ? if (!dist->enabled) {
>>> + ? ? ? ? ? ? ? atomic_set(&dist->irq_pending_on_cpu, 0);
>>> + ? ? ? ? ? ? ? return;
>>> + ? ? ? }
>>> +
>>> + ? ? ? for (c = 0; c < nrcpus; c++) {
>>> + ? ? ? ? ? ? ? struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, c);
>>> +
>>> + ? ? ? ? ? ? ? if (compute_pending_for_cpu(vcpu)) {
>>> + ? ? ? ? ? ? ? ? ? ? ? pr_debug("CPU%d has pending interrupts\n", c);
>>> + ? ? ? ? ? ? ? ? ? ? ? atomic_or((1 << c), &dist->irq_pending_on_cpu);
>>
>> this may be simpler with set_bit(c, ....) and have pending_on_cpu be
>> an unsigned long. not important.
>
> Indeed.
>

Thanks,
Christoffer