On Thu, 23 Jan 2020 13:47:46 +0000 Alexandru Elisei <alexandru.elisei@xxxxxxx> wrote: Hi, > kvmtool assumes that the BAR that holds the address for the MSIX table > and PBA structure has a size which is equal to their total size and it > allocates memory from MMIO space accordingly. However, when > initializing the BARs, the BAR size is set to the region size reported > by VFIO. When the physical BAR size is greater than the mmio space that > kvmtool allocates, we can have a situation where the BAR overlaps with > another BAR, in which case kvmtool will fail to map the memory. This was > found when trying to do PCI passthrough with a PCIe Realtek r8168 NIC, > when the guest was also using virtio-block and virtio-net devices: Good catch! > > [..] > [ 0.197926] PCI: OF: PROBE_ONLY enabled > [ 0.198454] pci-host-generic 40000000.pci: host bridge /pci ranges: > [ 0.199291] pci-host-generic 40000000.pci: IO 0x00007000..0x0000ffff -> 0x00007000 > [ 0.200331] pci-host-generic 40000000.pci: MEM 0x41000000..0x7fffffff -> 0x41000000 > [ 0.201480] pci-host-generic 40000000.pci: ECAM at [mem 0x40000000-0x40ffffff] for [bus 00] > [ 0.202635] pci-host-generic 40000000.pci: PCI host bridge to bus 0000:00 > [ 0.203535] pci_bus 0000:00: root bus resource [bus 00] > [ 0.204227] pci_bus 0000:00: root bus resource [io 0x0000-0x8fff] (bus address [0x7000-0xffff]) > [ 0.205483] pci_bus 0000:00: root bus resource [mem 0x41000000-0x7fffffff] > [ 0.206456] pci 0000:00:00.0: [10ec:8168] type 00 class 0x020000 > [ 0.207399] pci 0000:00:00.0: reg 0x10: [io 0x0000-0x00ff] > [ 0.208252] pci 0000:00:00.0: reg 0x18: [mem 0x41002000-0x41002fff] > [ 0.209233] pci 0000:00:00.0: reg 0x20: [mem 0x41000000-0x41003fff] > [ 0.210481] pci 0000:00:01.0: [1af4:1000] type 00 class 0x020000 > [ 0.211349] pci 0000:00:01.0: reg 0x10: [io 0x0100-0x01ff] > [ 0.212118] pci 0000:00:01.0: reg 0x14: [mem 0x41003000-0x410030ff] > [ 0.212982] pci 0000:00:01.0: reg 0x18: [mem 0x41003200-0x410033ff] > [ 0.214247] pci 0000:00:02.0: [1af4:1001] type 00 class 0x018000 > [ 0.215096] pci 0000:00:02.0: reg 0x10: [io 0x0200-0x02ff] > [ 0.215863] pci 0000:00:02.0: reg 0x14: [mem 0x41003400-0x410034ff] > [ 0.216723] pci 0000:00:02.0: reg 0x18: [mem 0x41003600-0x410037ff] > [ 0.218105] pci 0000:00:00.0: can't claim BAR 4 [mem 0x41000000-0x41003fff]: address conflict with 0000:00:00.0 [mem 0x41002000-0x41002fff] > [..] > > Guest output of lspci -vv: > > 00:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller (rev 06) > Subsystem: TP-LINK Technologies Co., Ltd. TG-3468 Gigabit PCI Express Network Adapter > Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- > Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx- > Interrupt: pin A routed to IRQ 16 > Region 0: I/O ports at 0000 [size=256] > Region 2: Memory at 41002000 (64-bit, non-prefetchable) [size=4K] > Region 4: Memory at 41000000 (64-bit, prefetchable) [size=16K] > Capabilities: [50] MSI: Enable- Count=1/1 Maskable- 64bit+ > Address: 0000000000000000 Data: 0000 > Capabilities: [b0] MSI-X: Enable- Count=4 Masked- > Vector table: BAR=4 offset=00000000 > PBA: BAR=4 offset=00001000 > > Let's fix this by allocating an amount of MMIO memory equal to the size > of the BAR that contains the MSIX table and/or PBA. > > Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx> Looks alright to me: Reviewed-by: Andre Przywara <andre.przywara@xxxxxxx> Cheers, Andre > --- > vfio/pci.c | 68 +++++++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 52 insertions(+), 16 deletions(-) > > diff --git a/vfio/pci.c b/vfio/pci.c > index 8e5d8572bc0c..bbb8469c8d93 100644 > --- a/vfio/pci.c > +++ b/vfio/pci.c > @@ -715,17 +715,44 @@ static int vfio_pci_fixup_cfg_space(struct vfio_device *vdev) > return 0; > } > > -static int vfio_pci_create_msix_table(struct kvm *kvm, > - struct vfio_pci_device *pdev) > +static int vfio_pci_get_region_info(struct vfio_device *vdev, u32 index, > + struct vfio_region_info *info) > +{ > + int ret; > + > + *info = (struct vfio_region_info) { > + .argsz = sizeof(*info), > + .index = index, > + }; > + > + ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, info); > + if (ret) { > + ret = -errno; > + vfio_dev_err(vdev, "cannot get info for BAR %u", index); > + return ret; > + } > + > + if (info->size && !is_power_of_two(info->size)) { > + vfio_dev_err(vdev, "region is not power of two: 0x%llx", > + info->size); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev) > { > int ret; > size_t i; > - size_t mmio_size; > + size_t map_size; > size_t nr_entries; > struct vfio_pci_msi_entry *entries; > + struct vfio_pci_device *pdev = &vdev->pci; > struct vfio_pci_msix_pba *pba = &pdev->msix_pba; > struct vfio_pci_msix_table *table = &pdev->msix_table; > struct msix_cap *msix = PCI_CAP(&pdev->hdr, pdev->msix.pos); > + struct vfio_region_info info; > > table->bar = msix->table_offset & PCI_MSIX_TABLE_BIR; > pba->bar = msix->pba_offset & PCI_MSIX_TABLE_BIR; > @@ -744,15 +771,31 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, > for (i = 0; i < nr_entries; i++) > entries[i].config.ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; > > + ret = vfio_pci_get_region_info(vdev, table->bar, &info); > + if (ret) > + return ret; > + if (!info.size) > + return -EINVAL; > + map_size = info.size; > + > + if (table->bar != pba->bar) { > + ret = vfio_pci_get_region_info(vdev, pba->bar, &info); > + if (ret) > + return ret; > + if (!info.size) > + return -EINVAL; > + map_size += info.size; > + } > + > /* > * To ease MSI-X cap configuration in case they share the same BAR, > * collapse table and pending array. The size of the BAR regions must be > * powers of two. > */ > - mmio_size = roundup_pow_of_two(table->size + pba->size); > - table->guest_phys_addr = pci_get_mmio_block(mmio_size); > + map_size = ALIGN(map_size, PAGE_SIZE); > + table->guest_phys_addr = pci_get_mmio_block(map_size); > if (!table->guest_phys_addr) { > - pr_err("cannot allocate IO space"); > + pr_err("cannot allocate MMIO space"); > ret = -ENOMEM; > goto out_free; > } > @@ -816,17 +859,10 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev, > > region->vdev = vdev; > region->is_ioport = !!(bar & PCI_BASE_ADDRESS_SPACE_IO); > - region->info = (struct vfio_region_info) { > - .argsz = sizeof(region->info), > - .index = nr, > - }; > > - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®ion->info); > - if (ret) { > - ret = -errno; > - vfio_dev_err(vdev, "cannot get info for BAR %zu", nr); > + ret = vfio_pci_get_region_info(vdev, nr, ®ion->info); > + if (ret) > return ret; > - } > > /* Ignore invalid or unimplemented regions */ > if (!region->info.size) > @@ -871,7 +907,7 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm, > return ret; > > if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) { > - ret = vfio_pci_create_msix_table(kvm, pdev); > + ret = vfio_pci_create_msix_table(kvm, vdev); > if (ret) > return ret; > }