kvmtool assumes that the BAR that holds the address for the MSIX table and PBA structure has a size which is equal to their total size and it allocates memory from MMIO space accordingly. However, when initializing the BARs, the BAR size is set to the region size reported by VFIO. When the physical BAR size is greater than the mmio space that kvmtool allocates, we can have a situation where the BAR overlaps with another BAR, in which case kvmtool will fail to map the memory. This was found when trying to do PCI passthrough with a PCIe Realtek r8168 NIC, when the guest was also using virtio-block and virtio-net devices: [..] [ 0.197926] PCI: OF: PROBE_ONLY enabled [ 0.198454] pci-host-generic 40000000.pci: host bridge /pci ranges: [ 0.199291] pci-host-generic 40000000.pci: IO 0x00007000..0x0000ffff -> 0x00007000 [ 0.200331] pci-host-generic 40000000.pci: MEM 0x41000000..0x7fffffff -> 0x41000000 [ 0.201480] pci-host-generic 40000000.pci: ECAM at [mem 0x40000000-0x40ffffff] for [bus 00] [ 0.202635] pci-host-generic 40000000.pci: PCI host bridge to bus 0000:00 [ 0.203535] pci_bus 0000:00: root bus resource [bus 00] [ 0.204227] pci_bus 0000:00: root bus resource [io 0x0000-0x8fff] (bus address [0x7000-0xffff]) [ 0.205483] pci_bus 0000:00: root bus resource [mem 0x41000000-0x7fffffff] [ 0.206456] pci 0000:00:00.0: [10ec:8168] type 00 class 0x020000 [ 0.207399] pci 0000:00:00.0: reg 0x10: [io 0x0000-0x00ff] [ 0.208252] pci 0000:00:00.0: reg 0x18: [mem 0x41002000-0x41002fff] [ 0.209233] pci 0000:00:00.0: reg 0x20: [mem 0x41000000-0x41003fff] [ 0.210481] pci 0000:00:01.0: [1af4:1000] type 00 class 0x020000 [ 0.211349] pci 0000:00:01.0: reg 0x10: [io 0x0100-0x01ff] [ 0.212118] pci 0000:00:01.0: reg 0x14: [mem 0x41003000-0x410030ff] [ 0.212982] pci 0000:00:01.0: reg 0x18: [mem 0x41003200-0x410033ff] [ 0.214247] pci 0000:00:02.0: [1af4:1001] type 00 class 0x018000 [ 0.215096] pci 0000:00:02.0: reg 0x10: [io 0x0200-0x02ff] [ 0.215863] pci 0000:00:02.0: reg 0x14: [mem 0x41003400-0x410034ff] [ 0.216723] pci 0000:00:02.0: reg 0x18: [mem 0x41003600-0x410037ff] [ 0.218105] pci 0000:00:00.0: can't claim BAR 4 [mem 0x41000000-0x41003fff]: address conflict with 0000:00:00.0 [mem 0x41002000-0x41002fff] [..] Guest output of lspci -vv: 00:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller (rev 06) Subsystem: TP-LINK Technologies Co., Ltd. TG-3468 Gigabit PCI Express Network Adapter Control: I/O+ Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx- Interrupt: pin A routed to IRQ 16 Region 0: I/O ports at 0000 [size=256] Region 2: Memory at 41002000 (64-bit, non-prefetchable) [size=4K] Region 4: Memory at 41000000 (64-bit, prefetchable) [size=16K] Capabilities: [50] MSI: Enable- Count=1/1 Maskable- 64bit+ Address: 0000000000000000 Data: 0000 Capabilities: [b0] MSI-X: Enable- Count=4 Masked- Vector table: BAR=4 offset=00000000 PBA: BAR=4 offset=00001000 Let's fix this by allocating an amount of MMIO memory equal to the size of the BAR that contains the MSIX table and/or PBA. Reviewed-by: Andre Przywara <andre.przywara@xxxxxxx> Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx> --- vfio/pci.c | 68 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/vfio/pci.c b/vfio/pci.c index 8e5d8572bc0c..bbb8469c8d93 100644 --- a/vfio/pci.c +++ b/vfio/pci.c @@ -715,17 +715,44 @@ static int vfio_pci_fixup_cfg_space(struct vfio_device *vdev) return 0; } -static int vfio_pci_create_msix_table(struct kvm *kvm, - struct vfio_pci_device *pdev) +static int vfio_pci_get_region_info(struct vfio_device *vdev, u32 index, + struct vfio_region_info *info) +{ + int ret; + + *info = (struct vfio_region_info) { + .argsz = sizeof(*info), + .index = index, + }; + + ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, info); + if (ret) { + ret = -errno; + vfio_dev_err(vdev, "cannot get info for BAR %u", index); + return ret; + } + + if (info->size && !is_power_of_two(info->size)) { + vfio_dev_err(vdev, "region is not power of two: 0x%llx", + info->size); + return -EINVAL; + } + + return 0; +} + +static int vfio_pci_create_msix_table(struct kvm *kvm, struct vfio_device *vdev) { int ret; size_t i; - size_t mmio_size; + size_t map_size; size_t nr_entries; struct vfio_pci_msi_entry *entries; + struct vfio_pci_device *pdev = &vdev->pci; struct vfio_pci_msix_pba *pba = &pdev->msix_pba; struct vfio_pci_msix_table *table = &pdev->msix_table; struct msix_cap *msix = PCI_CAP(&pdev->hdr, pdev->msix.pos); + struct vfio_region_info info; table->bar = msix->table_offset & PCI_MSIX_TABLE_BIR; pba->bar = msix->pba_offset & PCI_MSIX_TABLE_BIR; @@ -744,15 +771,31 @@ static int vfio_pci_create_msix_table(struct kvm *kvm, for (i = 0; i < nr_entries; i++) entries[i].config.ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; + ret = vfio_pci_get_region_info(vdev, table->bar, &info); + if (ret) + return ret; + if (!info.size) + return -EINVAL; + map_size = info.size; + + if (table->bar != pba->bar) { + ret = vfio_pci_get_region_info(vdev, pba->bar, &info); + if (ret) + return ret; + if (!info.size) + return -EINVAL; + map_size += info.size; + } + /* * To ease MSI-X cap configuration in case they share the same BAR, * collapse table and pending array. The size of the BAR regions must be * powers of two. */ - mmio_size = roundup_pow_of_two(table->size + pba->size); - table->guest_phys_addr = pci_get_mmio_block(mmio_size); + map_size = ALIGN(map_size, PAGE_SIZE); + table->guest_phys_addr = pci_get_mmio_block(map_size); if (!table->guest_phys_addr) { - pr_err("cannot allocate IO space"); + pr_err("cannot allocate MMIO space"); ret = -ENOMEM; goto out_free; } @@ -816,17 +859,10 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev, region->vdev = vdev; region->is_ioport = !!(bar & PCI_BASE_ADDRESS_SPACE_IO); - region->info = (struct vfio_region_info) { - .argsz = sizeof(region->info), - .index = nr, - }; - ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®ion->info); - if (ret) { - ret = -errno; - vfio_dev_err(vdev, "cannot get info for BAR %zu", nr); + ret = vfio_pci_get_region_info(vdev, nr, ®ion->info); + if (ret) return ret; - } /* Ignore invalid or unimplemented regions */ if (!region->info.size) @@ -871,7 +907,7 @@ static int vfio_pci_configure_dev_regions(struct kvm *kvm, return ret; if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSIX) { - ret = vfio_pci_create_msix_table(kvm, pdev); + ret = vfio_pci_create_msix_table(kvm, vdev); if (ret) return ret; } -- 2.20.1