On 26/03/16 02:14, Andre Przywara wrote: > Add emulation for some basic MMIO registers used in the ITS emulation. > This includes: > - GITS_{CTLR,TYPER,IIDR} > - ID registers > - GITS_{CBASER,CREADR,CWRITER} > those implement the ITS command buffer handling > > Most of the handlers are pretty straight forward, but CWRITER goes > some extra miles to allow fine grained locking. The idea here > is to let only the first instance iterate through the command ring > buffer, CWRITER accesses on other VCPUs meanwhile will be picked up > by that first instance and handled as well. The ITS lock is thus only > hold for very small periods of time and is dropped before the actual s/hold/held/ > command handler is called. > > Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx> > --- > include/kvm/vgic/vgic.h | 3 + > include/linux/irqchip/arm-gic-v3.h | 8 ++ > virt/kvm/arm/vgic/its-emul.c | 272 ++++++++++++++++++++++++++++++++++++- > virt/kvm/arm/vgic/vgic.h | 6 + > virt/kvm/arm/vgic/vgic_init.c | 2 + > 5 files changed, 284 insertions(+), 7 deletions(-) > > diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h > index c79bed5..bafea11 100644 > --- a/include/kvm/vgic/vgic.h > +++ b/include/kvm/vgic/vgic.h > @@ -115,6 +115,9 @@ struct vgic_io_device { > struct vgic_its { > bool enabled; > spinlock_t lock; > + u64 cbaser; > + int creadr; > + int cwriter; Irk. Please use explicitly sized types. > }; > > struct vgic_dist { > diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h > index a813c3e..7011b98 100644 > --- a/include/linux/irqchip/arm-gic-v3.h > +++ b/include/linux/irqchip/arm-gic-v3.h > @@ -179,15 +179,23 @@ > #define GITS_BASER 0x0100 > #define GITS_IDREGS_BASE 0xffd0 > #define GITS_PIDR2 GICR_PIDR2 > +#define GITS_PIDR4 0xffd0 > +#define GITS_CIDR0 0xfff0 > +#define GITS_CIDR1 0xfff4 > +#define GITS_CIDR2 0xfff8 > +#define GITS_CIDR3 0xfffc > > #define GITS_TRANSLATER 0x10040 > > #define GITS_CTLR_ENABLE (1U << 0) > #define GITS_CTLR_QUIESCENT (1U << 31) > > +#define GITS_TYPER_PLPIS (1UL << 0) > +#define GITS_TYPER_IDBITS_SHIFT 8 > #define GITS_TYPER_DEVBITS_SHIFT 13 > #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) > #define GITS_TYPER_PTA (1UL << 19) > +#define GITS_TYPER_HWCOLLCNT_SHIFT 24 > > #define GITS_CBASER_VALID (1UL << 63) > #define GITS_CBASER_nCnB (0UL << 59) > diff --git a/virt/kvm/arm/vgic/its-emul.c b/virt/kvm/arm/vgic/its-emul.c > index 49dd5e4..de8d360 100644 > --- a/virt/kvm/arm/vgic/its-emul.c > +++ b/virt/kvm/arm/vgic/its-emul.c > @@ -31,23 +31,263 @@ > #include "vgic.h" > #include "vgic_mmio.h" > > +#define BASER_BASE_ADDRESS(x) ((x) & 0xfffffffff000ULL) > + > +static int vgic_mmio_read_its_ctlr(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, void *val) > +{ > + struct vgic_its *its = &vcpu->kvm->arch.vgic.its; > + u32 reg; > + > + reg = GITS_CTLR_QUIESCENT; So your ITS is always in a quiescent state? Even when you're processing the command queue? You'll have to convince me... > + if (its->enabled) > + reg |= GITS_CTLR_ENABLE; > + > + write_mask32(reg, addr & 3, len, val); > + > + return 0; > +} > + > +static int vgic_mmio_write_its_ctlr(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, const void *val) > +{ > + struct vgic_its *its = &vcpu->kvm->arch.vgic.its; > + struct vgic_io_device *iodev = container_of(this, > + struct vgic_io_device, dev); > + > + if (addr - iodev->base_addr == 0) whitespace issue. > + its->enabled = !!(*(u8*)val & GITS_CTLR_ENABLE); > + > + return 0; > +} > + > +static int vgic_mmio_read_its_typer(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, void *val) > +{ > + u64 reg = GITS_TYPER_PLPIS; > + > + /* > + * We use linear CPU numbers for redistributor addressing, > + * so GITS_TYPER.PTA is 0. > + * To avoid memory waste on the guest side, we keep the > + * number of IDBits and DevBits low for the time being. > + * This could later be made configurable by userland. > + * Since we have all collections in linked list, we claim > + * that we can hold all of the collection tables in our > + * own memory and that the ITT entry size is 1 byte (the > + * smallest possible one). All of this is going to bite us when we want to implement migration, specially the HW collection bit. > + */ > + reg |= 0xff << GITS_TYPER_HWCOLLCNT_SHIFT; > + reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT; > + reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT; > + > + write_mask64(reg, addr & 7, len, val); > + > + return 0; > +} > + > +static int vgic_mmio_read_its_iidr(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, void *val) > +{ > + u32 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); > + > + write_mask32(reg, addr & 3, len, val); > + > + return 0; > +} > + > +static int vgic_mmio_read_its_idregs(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, void *val) > +{ > + struct vgic_io_device *iodev = container_of(this, > + struct vgic_io_device, dev); > + u32 reg = 0; > + int idreg = (addr & ~3) - iodev->base_addr + GITS_IDREGS_BASE; > + > + switch (idreg) { > + case GITS_PIDR2: > + reg = GIC_PIDR2_ARCH_GICv3; Are we leaving the lowest 4 bits to zero? > + break; > + case GITS_PIDR4: > + /* This is a 64K software visible page */ > + reg = 0x40; Same question. Also, how about all the others PIDR registers? > + break; > + /* Those are the ID registers for (any) GIC. */ > + case GITS_CIDR0: > + reg = 0x0d; > + break; > + case GITS_CIDR1: > + reg = 0xf0; > + break; > + case GITS_CIDR2: > + reg = 0x05; > + break; > + case GITS_CIDR3: > + reg = 0xb1; > + break; > + } Given that these values are directly taken from the architecture, and seem common to the whole GICv3 architecture when implemented by ARM, we could have a common handler for the whole GICv3 implementatuin. Not a bit deal though. > + > + write_mask32(reg, addr & 3, len, val); > + > + return 0; > +} > + > +/* > + * This function is called with both the ITS and the distributor lock dropped, > + * so the actual command handlers must take the respective locks when needed. > + */ > +static int vits_handle_command(struct kvm_vcpu *vcpu, u64 *its_cmd) > +{ > + return -ENODEV; > +} > + > +static int vgic_mmio_read_its_cbaser(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, void *val) > +{ > + struct vgic_its *its = &vcpu->kvm->arch.vgic.its; > + > + write_mask64(its->cbaser, addr & 7, len, val); > + > + return 0; > +} > + > +static int vgic_mmio_write_its_cbaser(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, const void *val) > +{ > + struct vgic_its *its = &vcpu->kvm->arch.vgic.its; > + > + if (its->enabled) > + return 0; > + > + its->cbaser = mask64(its->cbaser, addr & 7, len, val); > + its->creadr = 0; Don't you need to acquire the command queue lock here? > + > + return 0; > +} > + > +static int its_cmd_buffer_size(struct kvm *kvm) > +{ > + struct vgic_its *its = &kvm->arch.vgic.its; > + > + return ((its->cbaser & 0xff) + 1) << 12; > +} > + > +static gpa_t its_cmd_buffer_base(struct kvm *kvm) > +{ > + struct vgic_its *its = &kvm->arch.vgic.its; > + > + return BASER_BASE_ADDRESS(its->cbaser); > +} > + > +/* > + * By writing to CWRITER the guest announces new commands to be processed. > + * Since we cannot read from guest memory inside the ITS spinlock, we > + * iterate over the command buffer (with the lock dropped) until the read > + * pointer matches the write pointer. Other VCPUs writing this register in the > + * meantime will just update the write pointer, leaving the command > + * processing to the first instance of the function. > + */ > +static int vgic_mmio_write_its_cwriter(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, const void *val) > +{ > + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; > + struct vgic_its *its = &dist->its; > + gpa_t cbaser = its_cmd_buffer_base(vcpu->kvm); > + u64 cmd_buf[4]; > + u32 reg; > + bool finished; > + > + reg = mask64(its->cwriter & 0xfffe0, addr & 7, len, val); > + reg &= 0xfffe0; > + if (reg > its_cmd_buffer_size(vcpu->kvm)) > + return 0; > + > + spin_lock(&its->lock); > + > + /* > + * If there is still another VCPU handling commands, let this > + * one pick up the new CWRITER and process "our" new commands as well. > + */ How do you detect that condition? All I see is a massive race here, with two threads processing the queue in parallel, possibly corrupting each other's data. Please explain why you think this is safe. > + finished = (its->cwriter != its->creadr); > + its->cwriter = reg; > + > + spin_unlock(&its->lock); > + > + while (!finished) { > + int ret = kvm_read_guest(vcpu->kvm, cbaser + its->creadr, > + cmd_buf, 32); > + if (ret) { > + /* > + * Gah, we are screwed. Reset CWRITER to that command > + * that we have finished processing and return. > + */ > + spin_lock(&its->lock); > + its->cwriter = its->creadr; > + spin_unlock(&its->lock); > + break; > + } > + vits_handle_command(vcpu, cmd_buf); > + > + spin_lock(&its->lock); > + its->creadr += 32; > + if (its->creadr == its_cmd_buffer_size(vcpu->kvm)) > + its->creadr = 0; > + finished = (its->creadr == its->cwriter); > + spin_unlock(&its->lock); > + } > + > + return 0; > +} > + > +static int vgic_mmio_read_its_cwriter(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, void *val) > +{ > + struct vgic_its *its = &vcpu->kvm->arch.vgic.its; > + u64 reg = its->cwriter & 0xfffe0; > + > + write_mask64(reg, addr & 7, len, val); > + > + return 0; > +} > + > +static int vgic_mmio_read_its_creadr(struct kvm_vcpu *vcpu, > + struct kvm_io_device *this, > + gpa_t addr, int len, void *val) > +{ > + struct vgic_its *its = &vcpu->kvm->arch.vgic.its; > + u64 reg = its->creadr & 0xfffe0; > + > + write_mask64(reg, addr & 7, len, val); > + > + return 0; > +} > + > struct vgic_register_region its_registers[] = { > REGISTER_DESC_WITH_LENGTH(GITS_CTLR, > - vgic_mmio_read_raz, vgic_mmio_write_wi, 4), > + vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4), > REGISTER_DESC_WITH_LENGTH(GITS_IIDR, > - vgic_mmio_read_raz, vgic_mmio_write_wi, 4), > + vgic_mmio_read_its_iidr, vgic_mmio_write_wi, 4), > REGISTER_DESC_WITH_LENGTH(GITS_TYPER, > - vgic_mmio_read_raz, vgic_mmio_write_wi, 4), > + vgic_mmio_read_its_typer, vgic_mmio_write_wi, 4), > REGISTER_DESC_WITH_LENGTH(GITS_CBASER, > - vgic_mmio_read_raz, vgic_mmio_write_wi, 8), > + vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8), > REGISTER_DESC_WITH_LENGTH(GITS_CWRITER, > - vgic_mmio_read_raz, vgic_mmio_write_wi, 8), > + vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8), > REGISTER_DESC_WITH_LENGTH(GITS_CREADR, > - vgic_mmio_read_raz, vgic_mmio_write_wi, 8), > + vgic_mmio_read_its_creadr, vgic_mmio_write_wi, 8), > REGISTER_DESC_WITH_LENGTH(GITS_BASER, > vgic_mmio_read_raz, vgic_mmio_write_wi, 0x40), > REGISTER_DESC_WITH_LENGTH(GITS_IDREGS_BASE, > - vgic_mmio_read_raz, vgic_mmio_write_wi, 0x30), > + vgic_mmio_read_its_idregs, vgic_mmio_write_wi, 0x30), > }; > > /* This is called on setting the LPI enable bit in the redistributor. */ > @@ -59,9 +299,14 @@ int vits_init(struct kvm *kvm) > { > struct vgic_dist *dist = &kvm->arch.vgic; > struct vgic_its *its = &dist->its; > + int nr_vcpus = atomic_read(&kvm->online_vcpus); > struct vgic_io_device *regions; > int ret, i; > > + dist->pendbaser = kcalloc(nr_vcpus, sizeof(u64), GFP_KERNEL); > + if (!dist->pendbaser) > + return -ENOMEM; > + > spin_lock_init(&its->lock); > > regions = kmalloc_array(ARRAY_SIZE(its_registers), > @@ -82,3 +327,16 @@ int vits_init(struct kvm *kvm) > > return -ENXIO; > } > + > +void vits_destroy(struct kvm *kvm) > +{ > + struct vgic_dist *dist = &kvm->arch.vgic; > + struct vgic_its *its = &dist->its; > + > + if (!vgic_has_its(kvm)) > + return; > + > + kfree(dist->pendbaser); > + > + its->enabled = false; > +} > diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h > index 4e7dcb8..08f97d1 100644 > --- a/virt/kvm/arm/vgic/vgic.h > +++ b/virt/kvm/arm/vgic/vgic.h > @@ -63,6 +63,7 @@ int vgic_register_redist_regions(struct kvm *kvm, gpa_t dist_base_address); > > int vits_init(struct kvm *kvm); > void vgic_enable_lpis(struct kvm_vcpu *vcpu); > +void vits_destroy(struct kvm *kvm); > #else > static inline void vgic_v3_irq_change_affinity(struct kvm *kvm, u32 intid, > u64 mpidr) > @@ -137,6 +138,11 @@ static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu) > { > return; > } > + > +static inline void vits_destroy(struct kvm *kvm) > +{ > + return; > +} > #endif > > void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); > diff --git a/virt/kvm/arm/vgic/vgic_init.c b/virt/kvm/arm/vgic/vgic_init.c > index dcfb93d..e4459e3 100644 > --- a/virt/kvm/arm/vgic/vgic_init.c > +++ b/virt/kvm/arm/vgic/vgic_init.c > @@ -298,6 +298,8 @@ void kvm_vgic_destroy(struct kvm *kvm) > > kvm_vgic_dist_destroy(kvm); > > + vits_destroy(kvm); > + > kvm_for_each_vcpu(i, vcpu, kvm) > kvm_vgic_vcpu_destroy(vcpu); > } > Thanks, M. -- Jazz is not dead. It just smells funny... -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html