[snip] > OK, point taken. I'll have a look at reworking the macro mess, but will > focus on the rest of the review for the time being. I didn't review the accessor functions as I assume you'll respin the patch with adjustments, and I'll look at it then. [snip] >>> ?struct vgic_dist { >>> +#ifdef CONFIG_KVM_ARM_VGIC >>> + ? ? ? spinlock_t ? ? ? ? ? ? ?lock; >>> + >>> + ? ? ? void __iomem ? ? ? ? ? ?*vctrl_base; >>> + >>> + ? ? ? u32 ? ? ? ? ? ? ? ? ? ? enabled; >>> + >>> + ? ? ? struct vgic_bitmap ? ? ?irq_enabled; >>> + ? ? ? struct vgic_bitmap ? ? ?irq_pending; >>> + ? ? ? struct vgic_bitmap ? ? ?irq_active; /* Not used yet. Useful? */ >> >> are these not only used to preserve the GIC state for >> migration/powerloss and therefore not relevant when already held in >> main memory? If set to pending, figuring out if the interrupt is >> active, I guess we can just read the List registers, right? > > We could indeed remove it, and just ignores the writes to this registers. > I think we would want to support migration as a general concept, but probably not between non-kvm accelerated qemu environments and accelerated ones. So, we might need a way for userspace to tell KVM that an interrupt is active, but getting the information out we can query the list registers right? >>> + >>> + ? ? ? struct vgic_bytemap ? ? irq_priority;/* Not used yet. Useful? */ >> >> if we don't use these fields and have no way of testing them, I >> suggest removing them and if we need to remember that they could go >> here, there could be a comment on the structure itself. > > It could be used to restore a KVM-based VM to a QEMU-based system. If we > don't plan to support this, I'll just nuke the field. > not sure why this field is necessary for that kind of migration, but I don't think we will support that case. >> >>> + ? ? ? struct vgic_bytemap ? ? irq_target; >>> + >>> + ? ? ? struct vgic_2bitmap ? ? irq_cfg; /* Not used yet. Useful? */ >> >> if it's implementation defined if this is supported for any PPI then >> it seems unlikely we will ever have guests running that relies on the >> fact that some PPIs can be configured to be edge-triggered, Linux uses >> level-triggered interrupts for everything it can program anyway, iirc. >> >> if we take this out we can save all the 2bitmap stuff above as well right? > > Yes, same as above. > >>> + >>> + ? ? ? u8 ? ? ? ? ? ? ? ? ? ? ?irq_sgi_sources[VGIC_MAX_CPUS][16]; >>> + >>> + ? ? ? struct vgic_bitmap ? ? ?irq_spi_target[VGIC_MAX_CPUS]; >> >> why do you need a per-cpu view of something banked per-cpu? > > We're loosing a bit of memory here (one u32 per CPU). Not really a big > deal, but could be optimized if necessary. > I'm not concerned about the memory, I'm just saying looking at the struct itself it doesn't make much sense to me, so either a comment explaining it or not having it. >>> + >>> + ? ? ? atomic_t ? ? ? ? ? ? ? ?irq_pending_on_cpu; >>> +#endif >>> ?}; >>> >>> ?struct vgic_cpu { >>> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c >>> index f7856a9..1ace859 100644 >>> --- a/arch/arm/kvm/vgic.c >>> +++ b/arch/arm/kvm/vgic.c >>> @@ -21,6 +21,10 @@ >>> ?#include <linux/interrupt.h> >>> ?#include <linux/io.h> >>> >>> +/* Temporary hacks, need to probe DT instead */ >>> +#define VGIC_DIST_BASE ? ? ? ? 0x2c001000 >>> +#define VGIC_DIST_SIZE ? ? ? ? 0x1000 >>> + >> >> what's involved in fixing this now? I assume this should come from >> userspace who may read it off the supplied DT or? > > It is still something we have to work out with Peter. Probably involve > playing with PERIPHBASE instead of the DT. Will have to come from > userspace anyway. > ok, so I read on in some of your patches and saw that you're actually modifying it somewhat. >>> ?#define ACCESS_READ_VALUE ? ? ?(1 << 0) >>> ?#define ACCESS_READ_RAZ ? ? ? ? ? ? ? ?(0 << 0) >>> ?#define ACCESS_READ_MASK(x) ? ?((x) & (1 << 0)) >>> @@ -30,6 +34,9 @@ >>> ?#define ACCESS_WRITE_VALUE ? ? (3 << 1) >>> ?#define ACCESS_WRITE_MASK(x) ? ((x) & (3 << 1)) >>> >>> +static void vgic_update_state(struct kvm *kvm); >>> +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); >>> + >>> ?static void mmio_do_copy(struct kvm_run *run, u32 *reg, u32 offset, int mode) >>> ?{ >>> ? ? ? ?int u32off = offset & 3; >>> @@ -83,6 +90,182 @@ static void mmio_do_copy(struct kvm_run *run, u32 *reg, u32 offset, int mode) >>> ? ? ? ?} >>> ?} >>> >>> +static void handle_mmio_misc(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 reg; >>> + ? ? ? u32 u32off = offset & 3; >>> + >>> + ? ? ? switch (offset & ~3) { >>> + ? ? ? case 0: ? ? ? ? ? ? ? ? /* CTLR */ >>> + ? ? ? ? ? ? ? reg = vcpu->kvm->arch.vgic.enabled; >>> + ? ? ? ? ? ? ? mmio_do_copy(run, ®, u32off, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); >>> + ? ? ? ? ? ? ? if (run->mmio.is_write) { >>> + ? ? ? ? ? ? ? ? ? ? ? vcpu->kvm->arch.vgic.enabled = reg & 1; >>> + ? ? ? ? ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm); >>> + ? ? ? ? ? ? ? } >>> + ? ? ? ? ? ? ? break; >>> + >>> + ? ? ? case 4: ? ? ? ? ? ? ? ? /* TYPER */ >>> + ? ? ? ? ? ? ? reg ?= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; >>> + ? ? ? ? ? ? ? reg |= (VGIC_NR_IRQS >> 5) - 1; >>> + ? ? ? ? ? ? ? mmio_do_copy(run, ®, u32off, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); >>> + ? ? ? ? ? ? ? break; >>> + >>> + ? ? ? case 8: ? ? ? ? ? ? ? ? /* IIDR */ >>> + ? ? ? ? ? ? ? reg = 0x4B00043B; >> >> shouldn't this be keyed off something or is this fixed for the GICv2 >> something ARM specific? > > 0x4B is 'K'. 0x43B is the code for ARM. I read it as KVM/ARM, but that's > only me... > good enough for me :) >>> + ? ? ? ? ? ? ? mmio_do_copy(run, ®, u32off, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); >>> + ? ? ? ? ? ? ? break; >>> + ? ? ? } >>> +} >>> + >>> +static void handle_mmio_group_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? mmio_do_copy(run, NULL, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); >>> +} >>> + >>> +static void handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); >>> + ? ? ? if (run->mmio.is_write) >>> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm); >>> +} >>> + >>> +static void handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); >>> + ? ? ? if (run->mmio.is_write && offset < 4) /* Force SGI enabled */ >>> + ? ? ? ? ? ? ? *reg |= 0xffff; >> >> why no vgic_update_state(vcpu->kvm) here? > > Because I always like to leave tiny little silly bugs behind me... ;-) > Thanks for pointing this out. > >>> +} >>> + >>> +static void handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); >>> + ? ? ? if (run->mmio.is_write) >>> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm); >>> +} >>> + >>> +static void handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); >> >> why no vgic_update_state(vcpu->kvm) here? > > For the same reason as above, and for the sake of cut-n-paste programming. > >>> +} >>> + >>> +static void handle_mmio_set_active_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_active, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); >>> +} >>> + >>> +static void handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_active, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); >>> +} >> >> if these active register read/writes should be here, shouldn't they >> talk to the List registers (or whatever we use to back these values - >> haven't read that part yet)? > > As agreed above, I'll convert these to WI/RAZ. > >>> + >>> +static void handle_mmio_priority_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); >> >> shouldn't this call vgic_update_state(vcpu->kvm) to check the running >> priority of the virtual cpu interface? > > No, we do not take priority into account to deliver an interrupt, as > Linux doesn't use this. Can be introduced later if necessary. > then at least a TODO, or NOTE comment or something should be placed here. >>> +} >>> + >>> +static void update_spi_target(struct kvm *kvm) >>> +{ >>> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic; >>> + ? ? ? int c, i, nrcpus = atomic_read(&kvm->online_vcpus); >>> + ? ? ? u8 targ; >>> + ? ? ? unsigned long *bmap; >>> + >>> + ? ? ? for (i = 32; i < VGIC_NR_IRQS; i++) { >>> + ? ? ? ? ? ? ? targ = vgic_bytemap_get_irq_val(&dist->irq_target, 0, i); >>> + >> >> ah, I see, cpu=0 is because we're always using i >= 32. > > Yeah, it's an ugly hack. > >>> + ? ? ? ? ? ? ? for (c = 0; c < nrcpus; c++) { >>> + ? ? ? ? ? ? ? ? ? ? ? bmap = dist->irq_spi_target[c].global.reg_ul; >>> + >>> + ? ? ? ? ? ? ? ? ? ? ? if (targ & (1 << c)) >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? set_bit(i - 32, bmap); >>> + ? ? ? ? ? ? ? ? ? ? ? else >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? clear_bit(i - 32, bmap); >>> + ? ? ? ? ? ? ? } >>> + ? ? ? } >>> +} >> >> I'm reading this as an optimization so you can easily determine if a >> specific VCPU should get the SPI? Or am I getting this entirely wrong? > > It is a lot more than just an optimization. We really need to know which > CPU is getting targeted by a specific interrupt. > yes, but can't we just look in the irq_target map when we need this information? >> At what time is this performance critical operation performed? > > Only when writing to the target registers, which is hardly a performance > critical thing. > >> Perhaps a comment would be very helpful. This makes me wonder, doesn't >> Linux just configure cpu 0 to take all physical interrupts or did I >> glance over this code too soon? Do we have any reason for this >> complexity for VMs at this point or could we just pretend that ll >> physical SPIs were statically assigned to CPU0? Performance issues? > > You can change the affinity of an interrupt at any time. The fact that > Linux routes everything on CPU0 by default is just a default behavior. A > short example while running an SMP VM: > > root at linaro:~# grep eth0 /proc/interrupts > ?47: ? ? ? ? 20 ? ? ? ? ?0 ? ? ? GIC ?eth0 > root at linaro:~# cat /proc/irq/47/smp_affinity > 3 > root at linaro:~# echo 2 > /proc/irq/47/smp_affinity > root at linaro:~# ping 10.0.2.2 > PING 10.0.2.2 (10.0.2.2) 56(84) bytes of data. > 64 bytes from 10.0.2.2: icmp_req=1 ttl=255 time=5.96 ms > 64 bytes from 10.0.2.2: icmp_req=2 ttl=255 time=0.834 ms > 64 bytes from 10.0.2.2: icmp_req=3 ttl=255 time=0.818 ms > ^C > --- 10.0.2.2 ping statistics --- > 3 packets transmitted, 3 received, 0% packet loss, time 2002ms > rtt min/avg/max/mdev = 0.818/2.540/5.968/2.423 ms > root at linaro:~# grep eth0 /proc/interrupts > ?47: ? ? ? ? 20 ? ? ? ? ?3 ? ? ? GIC ?eth0 > > neat, I didn't look into this at all. >>> + >>> +static void handle_mmio_target_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg; >>> + >>> + ? ? ? /* We treat the banked interrupts targets as read-only */ >>> + ? ? ? if (offset < 32) { >> >> why is this < 32 ? shouldn't it be < 8 ? > > This is organized as one byte per interrupt. Each bit in the byte is > representing a CPU. > oh yes, my bad. sorry. this should be RAZ if atomic_read(&kvm->online_vcpus) == 1 though right? >>> + ? ? ? ? ? ? ? u32 roreg = 1 << vcpu->vcpu_id; >>> + ? ? ? ? ? ? ? roreg |= roreg << 8; >>> + ? ? ? ? ? ? ? roreg |= roreg << 16; >>> + >>> + ? ? ? ? ? ? ? mmio_do_copy(run, &roreg, offset, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); >>> + ? ? ? ? ? ? ? return; >>> + ? ? ? } >>> + >>> + ? ? ? reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_target, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); >>> + ? ? ? if (run->mmio.is_write) { >>> + ? ? ? ? ? ? ? update_spi_target(vcpu->kvm); >>> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm); >>> + ? ? ? } >>> +} >>> + >>> +static void handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 *reg = vgic_2bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vcpu->vcpu_id, offset); >>> + ? ? ? mmio_do_copy(run, reg, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); >> >> shouldn't we make sure that SGIs are RAO here? > > Indeed. > >> otherwise, could we just do RAZ/WI here? > > Only if we discard the idea of migrating a VM to a non KVM environment. > As I wrote above, I think it's ok to discard that idea, but I don't see the exact relation here. If QEMU needs to know the content of this register, we can just return RAZ right? >>> +} >>> + >>> +static void handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, >>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct kvm_run *run, u32 offset) >>> +{ >>> + ? ? ? u32 reg; >>> + ? ? ? mmio_do_copy(run, ®, offset, >>> + ? ? ? ? ? ? ? ? ? ?ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); >>> + ? ? ? if (run->mmio.is_write) { >>> + ? ? ? ? ? ? ? vgic_dispatch_sgi(vcpu, reg); >>> + ? ? ? ? ? ? ? vgic_update_state(vcpu->kvm); >>> + ? ? ? } >>> +} >>> + >>> ?/* All this should really be generic code... FIXME!!! */ >>> ?struct mmio_range { >>> ? ? ? ?unsigned long base; >>> @@ -92,6 +275,66 @@ struct mmio_range { >>> ?}; >>> >>> ?static const struct mmio_range vgic_ranges[] = { >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* CTRL, TYPER, IIDR */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= 12, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_misc, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* IGROUPRn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x80, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8, >> >> what happens if this is not byte aligned and the guest reads the >> entire register, then find_matching_range will not find this - is this >> a supported situation? > > I'm not sure I understand what you mean here... > if (VGIC_NR_IRQS / 8) is not word-aligned and you read the last register, this would fail - but it doesn't make sense when VGIC_NR_IRQS needs to be 32(N+1). It does make me think that it would be really nice to define the VGIC_NR_IRQS like this: #define VGIC_ITLINES 3 #define VGIC_NR_IRQS (32 * (VGIC_ITLINES + 1)) should shatter many doubts I had for a first read :) >> >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_group_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISENABLERn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x100, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_enable_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICENABLERn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x180, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_enable_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISPENDRn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x200, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_pending_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICPENDRn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x280, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_pending_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ISACTIVERn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x300, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_set_active_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICACTIVERn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x380, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 8, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_clear_active_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* IPRIORITYRn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x400, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_priority_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ITARGETSRn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0x800, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_target_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* ICFGRn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0xC00, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= VGIC_NR_IRQS / 4, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_cfg_reg, >>> + ? ? ? }, >>> + ? ? ? { ? ? ? ? ? ? ? ? ? ? ? /* SGIRn */ >>> + ? ? ? ? ? ? ? .base ? ? ? ? ? = VGIC_DIST_BASE + 0xF00, >>> + ? ? ? ? ? ? ? .len ? ? ? ? ? ?= 4, >>> + ? ? ? ? ? ? ? .handle_mmio ? ?= handle_mmio_sgi_reg, >>> + ? ? ? }, >>> ? ? ? ?{} >>> ?}; >>> >>> @@ -121,10 +364,82 @@ int vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run) >>> ? ? ? ?if (!range || !range->handle_mmio) >>> ? ? ? ? ? ? ? ?return KVM_EXIT_MMIO; >>> >>> + ? ? ? spin_lock(&vcpu->kvm->arch.vgic.lock); >>> ? ? ? ?pr_debug("emulating %d %08llx %d\n", run->mmio.is_write, >>> ? ? ? ? ? ? ? ? run->mmio.phys_addr, run->mmio.len); >>> ? ? ? ?range->handle_mmio(vcpu, run, run->mmio.phys_addr - range->base); >>> ? ? ? ?kvm_handle_mmio_return(vcpu, run); >>> + ? ? ? spin_unlock(&vcpu->kvm->arch.vgic.lock); >>> >>> ? ? ? ?return KVM_EXIT_UNKNOWN; >>> ?} >>> + >>> +static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) >>> +{ >>> + ? ? ? struct kvm *kvm = vcpu->kvm; >>> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic; >>> + ? ? ? int nrcpus = atomic_read(&kvm->online_vcpus); >>> + ? ? ? u8 target_cpus; >>> + ? ? ? int sgi, mode, c, vcpu_id; >>> + >>> + ? ? ? vcpu_id = vcpu->vcpu_id; >>> + >>> + ? ? ? sgi = reg & 0xf; >>> + ? ? ? target_cpus = (reg >> 16) & 0xff; >> >> no worries about the NSATT bit here? I'm a little fuzzy about the >> interrupt grouping and how that works with a guest always (or maybe >> not thinking so?) running in non-secure mode. > > The notion of group only makes sense in secure mode. In non-secure mode, > you only see the interrupts that are flagged as group-1 by the secure > mode. These interrupts appear as group-0 on the non-secure side. > ok, thanks. >>> + ? ? ? mode = (reg >> 24) & 3; >>> + >>> + ? ? ? switch (mode) { >>> + ? ? ? case 0: >>> + ? ? ? ? ? ? ? if (!target_cpus) >>> + ? ? ? ? ? ? ? ? ? ? ? return; >>> + >>> + ? ? ? case 1: >>> + ? ? ? ? ? ? ? target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; >>> + ? ? ? ? ? ? ? break; >>> + >>> + ? ? ? case 2: >>> + ? ? ? ? ? ? ? target_cpus = 1 << vcpu_id; >>> + ? ? ? ? ? ? ? break; >>> + ? ? ? } >>> + >>> + ? ? ? for (c = 0; c < nrcpus; c++) { >>> + ? ? ? ? ? ? ? if (target_cpus & 1) { >>> + ? ? ? ? ? ? ? ? ? ? ? /* Flag the SGI as pending */ >>> + ? ? ? ? ? ? ? ? ? ? ? vgic_bitmap_set_irq_val(&dist->irq_pending, c, sgi, 1); >>> + ? ? ? ? ? ? ? ? ? ? ? dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id; >>> + ? ? ? ? ? ? ? ? ? ? ? pr_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); >> >> should this not rather be a trace event? > > Possibly. I haven't looked at the trace events yet. Any suggestion? > just define a trace event in arch/arm/kvm/trace.h and call that from here with the right parameters, e.g.: +TRACE_EVENT(kvm_sgi, + TP_PROTO(int, int, int), + TP_ARGS(sgi, source_vcpu, dest_vcpu), + + TP_STRUCT__entry( + __field( unsigned long, sgi ) + __field( unsigned long, source_vcpu ) + __field( unsigned long, dest_vcpu ) + ), + + TP_fast_assign( + __entry->sgi = sgi; + __entry->source_vcpu = source_vcpu; + __entry->dest_vcpu = dest_vcpu; + ), + + TP_printk("SGI %d from VCPU %d to VCPU %d", + __entry->sgi, __entry->source_vcpu, __entry->dest_vcpu) +); [snip] +trace_kvm_emulate_cp15_imp(sgi, vcpu_id, c); >>> + ? ? ? ? ? ? ? } >>> + >>> + ? ? ? ? ? ? ? target_cpus >>= 1; >>> + ? ? ? } >>> +} >>> + >>> +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) >>> +{ >>> + ? ? ? return 0; >>> +} >>> + >>> +/* >>> + * Update the interrupt state and determine which CPUs have pending >>> + * interrupts. Must be called with distributor lock held. >>> + */ >>> +static void vgic_update_state(struct kvm *kvm) >>> +{ >>> + ? ? ? struct vgic_dist *dist = &kvm->arch.vgic; >>> + ? ? ? int nrcpus = atomic_read(&kvm->online_vcpus); >>> + ? ? ? int c; >>> + >>> + ? ? ? if (!dist->enabled) { >>> + ? ? ? ? ? ? ? atomic_set(&dist->irq_pending_on_cpu, 0); >>> + ? ? ? ? ? ? ? return; >>> + ? ? ? } >>> + >>> + ? ? ? for (c = 0; c < nrcpus; c++) { >>> + ? ? ? ? ? ? ? struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, c); >>> + >>> + ? ? ? ? ? ? ? if (compute_pending_for_cpu(vcpu)) { >>> + ? ? ? ? ? ? ? ? ? ? ? pr_debug("CPU%d has pending interrupts\n", c); >>> + ? ? ? ? ? ? ? ? ? ? ? atomic_or((1 << c), &dist->irq_pending_on_cpu); >> >> this may be simpler with set_bit(c, ....) and have pending_on_cpu be >> an unsigned long. not important. > > Indeed. > Thanks, Christoffer