virHostdevPreparePCIDevices verifies if a PCI hostdev "A" is in use by another domain by checking the 'activePCIHostdevs' list. It also verifies if other domains are using any other hostdev that belongs to the same IOMMU of "A". This is not enough to cover all the cases. Suppose a PCI hostdev "B" that shares the IOMMU with "A". We are currently able to abort guest launch if "B" is being used by another domain, but if "B" is not being used by any domain, we'll proceed guest execution. What happens then is: if "A" is being used in managed mode, the guest will fail to launch because Libvirt didn't detach "B" from the host. If "A" is being used in un-managed mode, the guest might fail or succeed to launch depending on whether both "A" and "B" were detached manually prior to guest start. Libvirt is now able to deal with these scenarios in a homogeneous fashion, providing the same behavior for both managed and un-managed mode while allowing parcial assignment for both cases as well. This patch changes virHostdevPreparePCIDevices to check all the PCI hostdevs of the domain against all the PCI hostdevs of the IOMMU, failing to launch if the domain does not contain all the PCI hostdevs declared, in both managed and un-managed cases. After this patch, any existing domains that were using PCI hostdevs with un-managed mode, and were getting away with partial assignment without declaring all the IOMMU PCI hostdevs in the domain XML, will be forced to do so. The missing PCI hostdevs can be declared with "<address type='unassigned'/>" to keep them invisible to the guest, making the guest oblivious of this change. This is an annoyance for such domains, but the payoff is more consistency of behavior between managed and un-managed mode and more clarity on the domain XML, forcing the admin to declare all PCI hostdevs of the IOMMU and knowing exactly what will be detached from the host. This is an example of this newly added error condition when the domain does not declare all the PCI hostdevs of the same IOMMU: $ sudo ./run tools/virsh start multipci-coldplug-partial-fail error: Failed to start domain multipci-coldplug-partial-fail error: Requested operation is not valid: All devices of the same IOMMU group 1 of the PCI device 0001:09:00.0 must belong to domain multipci-coldplug-partial-fail $ Signed-off-by: Daniel Henrique Barboza <danielhb413@xxxxxxxxx> --- src/util/virhostdev.c | 64 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c index 39e6b8f49f..2e7885112f 100644 --- a/src/util/virhostdev.c +++ b/src/util/virhostdev.c @@ -56,6 +56,13 @@ struct virHostdevIsPCINodeDeviceUsedData { bool usesVFIO; }; +struct virHostdevIsAllIOMMUGroupUsedData { + virPCIDeviceListPtr pcidevs; + const char *domainName; + const char *deviceName; + int iommuGroup; +}; + /* This module makes heavy use of bookkeeping lists contained inside a * virHostdevManager instance to keep track of the devices' status. To make * it easy to spot potential ownership errors when moving devices from one @@ -111,6 +118,27 @@ static int virHostdevIsPCINodeDeviceUsed(virPCIDeviceAddressPtr devAddr, void *o return 0; } +static int virHostdevIsAllIOMMUGroupUsed(virPCIDeviceAddressPtr devAddr, void *opaque) +{ + struct virHostdevIsAllIOMMUGroupUsedData *helperData = opaque; + virPCIDevicePtr actual; + + actual = virPCIDeviceListFindByIDs(helperData->pcidevs, + devAddr->domain, devAddr->bus, + devAddr->slot, devAddr->function); + if (actual) { + return 0; + } else { + virReportError(VIR_ERR_OPERATION_INVALID, + _("All devices of the same IOMMU group %d of " + "the PCI device %s must belong to domain %s"), + helperData->iommuGroup, + helperData->deviceName, + helperData->domainName); + return -1; + } +} + static int virHostdevManagerOnceInit(void) { if (!VIR_CLASS_NEW(virHostdevManager, virClassForObject())) @@ -724,9 +752,10 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr mgr, unsigned int flags) { g_autoptr(virPCIDeviceList) pcidevs = NULL; + g_autofree unsigned int *searchedIOMMUs = NULL; int last_processed_hostdev_vf = -1; - size_t i; - int ret = -1; + size_t i, j; + int ret = -1, nSearchedIOMMUs = 0; virPCIDeviceAddressPtr devAddr = NULL; if (!nhostdevs) @@ -735,6 +764,8 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr mgr, if (!(pcidevs = virHostdevGetPCIHostDeviceList(hostdevs, nhostdevs))) return -1; + searchedIOMMUs = g_new0(unsigned int, virPCIDeviceListCount(pcidevs)); + virObjectLock(mgr->activePCIHostdevs); virObjectLock(mgr->inactivePCIHostdevs); @@ -778,13 +809,32 @@ virHostdevPreparePCIDevices(virHostdevManagerPtr mgr, goto cleanup; /* VFIO devices belonging to same IOMMU group can't be - * shared across guests. Check if that's the case. */ + * shared across guests, and all of the must belong to the + * same domain. If a device isn't going to be assigned + * (flag unassigned is true) the guest will not use it, but + * the actual device must be detached together from the host + * anyway. */ if (usesVFIO) { - data.usesVFIO = true; - if (virPCIDeviceAddressIOMMUGroupIterate(devAddr, - virHostdevIsPCINodeDeviceUsed, - &data) < 0) + int devIOMMUGroup = virPCIDeviceAddressGetIOMMUGroupNum(devAddr); + struct virHostdevIsAllIOMMUGroupUsedData helper = { + pcidevs, dom_name, virPCIDeviceGetName(pci), devIOMMUGroup}; + bool alreadySearched = false; + + for (j = 0; j < nSearchedIOMMUs; j++) { + if (devIOMMUGroup == searchedIOMMUs[j]) { + alreadySearched = true; + break; + } + } + + if (alreadySearched) + continue; + + if (virPCIDeviceAddressIOMMUGroupIterate( + devAddr, virHostdevIsAllIOMMUGroupUsed, &helper) < 0) goto cleanup; + + searchedIOMMUs[nSearchedIOMMUs++] = devIOMMUGroup; } } -- 2.23.0 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list