Hi Zenghui, Marc, On 8/23/19 7:33 PM, Eric Auger wrote: > At the moment we use 2 IO devices per GICv3 redistributor: one > one for the RD_base frame and one for the SGI_base frame. > > Instead we can use a single IO device per redistributor (the 2 > frames are contiguous). This saves slots on the KVM_MMIO_BUS > which is currently limited to NR_IOBUS_DEVS (1000). > > This change allows to instantiate up to 512 redistributors and may > speed the guest boot with a large number of VCPUs. > > Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx> I tested this patch with below kernel and QEMU branches: kernel: https://github.com/eauger/linux/tree/256fix-v1 (Marc's patch + this patch) https://github.com/eauger/qemu/tree/v4.1.0-256fix-rfc1-rc0 (header update + kvm_arm_gic_set_irq modification) On a machine with 224 pcpus, I was able to boot a 512 vcpu guest. As expected, qemu outputs warnings: qemu-system-aarch64: warning: Number of SMP cpus requested (512) exceeds the recommended cpus supported by KVM (224) qemu-system-aarch64: warning: Number of hotpluggable cpus requested (512) exceeds the recommended cpus supported by KVM (224) on the guest: getconf _NPROCESSORS_ONLN returns 512 Then I have no clue about what can be expected of such overcommit config and I have not further exercised the guest at the moment. But at least it seems to boot properly. I also tested without overcommit and it seems to behave as before (boot, migration). I still need to look at the migration of > 256vcpu guest at qemu level. Thanks Eric > --- > include/kvm/arm_vgic.h | 1 - > virt/kvm/arm/vgic/vgic-init.c | 1 - > virt/kvm/arm/vgic/vgic-mmio-v3.c | 81 ++++++++++---------------------- > 3 files changed, 24 insertions(+), 59 deletions(-) > > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index 7a30524a80ee..004f6e9d3b05 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -311,7 +311,6 @@ struct vgic_cpu { > * parts of the redistributor. > */ > struct vgic_io_device rd_iodev; > - struct vgic_io_device sgi_iodev; > struct vgic_redist_region *rdreg; > > /* Contains the attributes and gpa of the LPI pending tables. */ > diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c > index bdbc297d06fb..eaff7031a089 100644 > --- a/virt/kvm/arm/vgic/vgic-init.c > +++ b/virt/kvm/arm/vgic/vgic-init.c > @@ -192,7 +192,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) > int i; > > vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; > - vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF; > > INIT_LIST_HEAD(&vgic_cpu->ap_list_head); > raw_spin_lock_init(&vgic_cpu->ap_list_lock); > diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c > index c45e2d7e942f..400067085cab 100644 > --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c > +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c > @@ -515,7 +515,8 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { > VGIC_ACCESS_32bit), > }; > > -static const struct vgic_register_region vgic_v3_rdbase_registers[] = { > +static const struct vgic_register_region vgic_v3_rd_registers[] = { > + /* RD_base registers */ > REGISTER_DESC_WITH_LENGTH(GICR_CTLR, > vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, > VGIC_ACCESS_32bit), > @@ -540,44 +541,42 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { > REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, > vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, > VGIC_ACCESS_32bit), > -}; > - > -static const struct vgic_register_region vgic_v3_sgibase_registers[] = { > - REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0, > + /* SGI_base registers */ > + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0, > vgic_mmio_read_group, vgic_mmio_write_group, 4, > VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0, > + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0, > vgic_mmio_read_enable, vgic_mmio_write_senable, 4, > VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0, > + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0, > vgic_mmio_read_enable, vgic_mmio_write_cenable, 4, > VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0, > + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, > vgic_mmio_read_pending, vgic_mmio_write_spending, > vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, > VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0, > + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, > vgic_mmio_read_pending, vgic_mmio_write_cpending, > vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4, > VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0, > + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0, > vgic_mmio_read_active, vgic_mmio_write_sactive, > NULL, vgic_mmio_uaccess_write_sactive, > 4, VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0, > + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0, > vgic_mmio_read_active, vgic_mmio_write_cactive, > NULL, vgic_mmio_uaccess_write_cactive, > 4, VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, > + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0, > vgic_mmio_read_priority, vgic_mmio_write_priority, 32, > VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), > - REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0, > + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0, > vgic_mmio_read_config, vgic_mmio_write_config, 8, > VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0, > + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0, > vgic_mmio_read_raz, vgic_mmio_write_wi, 4, > VGIC_ACCESS_32bit), > - REGISTER_DESC_WITH_LENGTH(GICR_NSACR, > + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR, > vgic_mmio_read_raz, vgic_mmio_write_wi, 4, > VGIC_ACCESS_32bit), > }; > @@ -607,9 +606,8 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) > struct vgic_dist *vgic = &kvm->arch.vgic; > struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; > struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; > - struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; > struct vgic_redist_region *rdreg; > - gpa_t rd_base, sgi_base; > + gpa_t rd_base; > int ret; > > if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) > @@ -631,52 +629,31 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) > vgic_cpu->rdreg = rdreg; > > rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE; > - sgi_base = rd_base + SZ_64K; > > kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); > rd_dev->base_addr = rd_base; > rd_dev->iodev_type = IODEV_REDIST; > - rd_dev->regions = vgic_v3_rdbase_registers; > - rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); > + rd_dev->regions = vgic_v3_rd_registers; > + rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); > rd_dev->redist_vcpu = vcpu; > > mutex_lock(&kvm->slots_lock); > ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, > - SZ_64K, &rd_dev->dev); > + 2 * SZ_64K, &rd_dev->dev); > mutex_unlock(&kvm->slots_lock); > > if (ret) > return ret; > > - kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); > - sgi_dev->base_addr = sgi_base; > - sgi_dev->iodev_type = IODEV_REDIST; > - sgi_dev->regions = vgic_v3_sgibase_registers; > - sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); > - sgi_dev->redist_vcpu = vcpu; > - > - mutex_lock(&kvm->slots_lock); > - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, > - SZ_64K, &sgi_dev->dev); > - if (ret) { > - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, > - &rd_dev->dev); > - goto out; > - } > - > rdreg->free_index++; > -out: > - mutex_unlock(&kvm->slots_lock); > - return ret; > + return 0; > } > > static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) > { > struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; > - struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; > > kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev); > - kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev); > } > > static int vgic_register_all_redist_iodevs(struct kvm *kvm) > @@ -826,8 +803,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) > iodev.base_addr = 0; > break; > case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{ > - iodev.regions = vgic_v3_rdbase_registers; > - iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); > + iodev.regions = vgic_v3_rd_registers; > + iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); > iodev.base_addr = 0; > break; > } > @@ -985,21 +962,11 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, > int offset, u32 *val) > { > struct vgic_io_device rd_dev = { > - .regions = vgic_v3_rdbase_registers, > - .nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers), > + .regions = vgic_v3_rd_registers, > + .nr_regions = ARRAY_SIZE(vgic_v3_rd_registers), > }; > > - struct vgic_io_device sgi_dev = { > - .regions = vgic_v3_sgibase_registers, > - .nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers), > - }; > - > - /* SGI_base is the next 64K frame after RD_base */ > - if (offset >= SZ_64K) > - return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K, > - val); > - else > - return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); > + return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); > } > > int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, >