Now that we have virNVMeDevice module (introduced in previous commit), let's use it int virHostdev to track which NVMe devices are free to be used by a domain and which are taken. Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> --- src/libvirt_private.syms | 5 + src/util/virhostdev.c | 331 +++++++++++++++++++++++++++++++++++++++ src/util/virhostdev.h | 37 +++++ src/util/virnvme.h | 2 + 4 files changed, 375 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index aab2b49bdf..398f8c21ac 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2146,18 +2146,23 @@ virHostdevPCINodeDeviceReAttach; virHostdevPCINodeDeviceReset; virHostdevPrepareDomainDevices; virHostdevPrepareMediatedDevices; +virHostdevPrepareNVMeDevices; +virHostdevPrepareOneNVMeDevice; virHostdevPreparePCIDevices; virHostdevPrepareSCSIDevices; virHostdevPrepareSCSIVHostDevices; virHostdevPrepareUSBDevices; virHostdevReAttachDomainDevices; virHostdevReAttachMediatedDevices; +virHostdevReAttachNVMeDevices; +virHostdevReAttachOneNVMeDevice; virHostdevReAttachPCIDevices; virHostdevReAttachSCSIDevices; virHostdevReAttachSCSIVHostDevices; virHostdevReAttachUSBDevices; virHostdevUpdateActiveDomainDevices; virHostdevUpdateActiveMediatedDevices; +virHostdevUpdateActiveNVMeDevices; virHostdevUpdateActivePCIDevices; virHostdevUpdateActiveSCSIDevices; virHostdevUpdateActiveUSBDevices; diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c index 94a0185597..78e409732a 100644 --- a/src/util/virhostdev.c +++ b/src/util/virhostdev.c @@ -135,6 +135,7 @@ virHostdevManagerDispose(void *obj) virObjectUnref(hostdevMgr->activeSCSIHostdevs); virObjectUnref(hostdevMgr->activeSCSIVHostHostdevs); virObjectUnref(hostdevMgr->activeMediatedHostdevs); + virObjectUnref(hostdevMgr->activeNVMeHostdevs); VIR_FREE(hostdevMgr->stateDir); } @@ -165,6 +166,9 @@ virHostdevManagerNew(void) if (!(hostdevMgr->activeMediatedHostdevs = virMediatedDeviceListNew())) return NULL; + if (!(hostdevMgr->activeNVMeHostdevs = virNVMeDeviceListNew())) + return NULL; + if (privileged) { hostdevMgr->stateDir = g_strdup(HOSTDEV_STATE_DIR); @@ -2235,3 +2239,330 @@ virHostdevUpdateActiveDomainDevices(virHostdevManagerPtr mgr, return 0; } + + +static int +virHostdevGetNVMeDeviceList(virNVMeDeviceListPtr nvmeDevices, + virStorageSourcePtr src, + const char *drv_name, + const char *dom_name) +{ + virStorageSourcePtr n; + + for (n = src; virStorageSourceIsBacking(n); n = n->backingStore) { + g_autoptr(virNVMeDevice) dev = NULL; + const virStorageSourceNVMeDef *srcNVMe = n->nvme; + + if (n->type != VIR_STORAGE_TYPE_NVME) + continue; + + if (!(dev = virNVMeDeviceNew(&srcNVMe->pciAddr, + srcNVMe->namespace, + srcNVMe->managed))) + return -1; + + virNVMeDeviceUsedBySet(dev, drv_name, dom_name); + + if (virNVMeDeviceListAdd(nvmeDevices, dev) < 0) + return -1; + } + + return 0; +} + + +int +virHostdevPrepareOneNVMeDevice(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virStorageSourcePtr src) +{ + g_autoptr(virNVMeDeviceList) nvmeDevices = NULL; + g_autoptr(virPCIDeviceList) pciDevices = NULL; + const unsigned int pciFlags = 0; + virNVMeDevicePtr temp = NULL; + size_t i; + ssize_t lastGoodNVMeIdx = -1; + int ret = -1; + + if (!(nvmeDevices = virNVMeDeviceListNew())) + return -1; + + if (virHostdevGetNVMeDeviceList(nvmeDevices, src, drv_name, dom_name) < 0) + return -1; + + if (virNVMeDeviceListCount(nvmeDevices) == 0) + return 0; + + virObjectLock(hostdev_mgr->activeNVMeHostdevs); + + /* Firstly, let's check if all devices are free */ + for (i = 0; i < virNVMeDeviceListCount(nvmeDevices); i++) { + const virNVMeDevice *dev = virNVMeDeviceListGet(nvmeDevices, i); + const virPCIDeviceAddress *addr = NULL; + g_autofree char *addrStr = NULL; + const char *actual_drvname = NULL; + const char *actual_domname = NULL; + + temp = virNVMeDeviceListLookup(hostdev_mgr->activeNVMeHostdevs, dev); + + /* Not on the list means not used */ + if (!temp) + continue; + + virNVMeDeviceUsedByGet(temp, &actual_drvname, &actual_domname); + addr = virNVMeDeviceAddressGet(dev); + addrStr = virPCIDeviceAddressAsString(addr); + + virReportError(VIR_ERR_OPERATION_INVALID, + _("NVMe device %s already in use by driver %s domain %s"), + NULLSTR(addrStr), actual_drvname, actual_domname); + goto cleanup; + } + + if (!(pciDevices = virNVMeDeviceListCreateDetachList(hostdev_mgr->activeNVMeHostdevs, + nvmeDevices))) + goto cleanup; + + /* Let's check if all PCI devices are NVMe disks. */ + for (i = 0; i < virPCIDeviceListCount(pciDevices); i++) { + virPCIDevicePtr pci = virPCIDeviceListGet(pciDevices, i); + g_autofree char *drvPath = NULL; + g_autofree char *drvName = NULL; + int stub = VIR_PCI_STUB_DRIVER_NONE; + + if (virPCIDeviceGetDriverPathAndName(pci, &drvPath, &drvName) < 0) + goto cleanup; + + if (drvName) + stub = virPCIStubDriverTypeFromString(drvName); + + if (stub == VIR_PCI_STUB_DRIVER_VFIO || + STREQ_NULLABLE(drvName, "nvme")) + continue; + + VIR_WARN("Suspicious NVMe disk assignment. PCI device " + "%s is not an NVMe disk, it has %s driver", + virPCIDeviceGetName(pci), NULLSTR(drvName)); + } + + /* This looks like a good opportunity to merge inactive NVMe devices onto + * the active list. This, however, means that if something goes wrong we + * have to perform a rollback before returning. */ + for (i = 0; i < virNVMeDeviceListCount(nvmeDevices); i++) { + temp = virNVMeDeviceListGet(nvmeDevices, i); + + if (virNVMeDeviceListAdd(hostdev_mgr->activeNVMeHostdevs, temp) < 0) + goto rollback; + + lastGoodNVMeIdx = i; + } + + if (virHostdevPreparePCIDevicesImpl(hostdev_mgr, + drv_name, dom_name, NULL, + pciDevices, NULL, 0, pciFlags) < 0) + goto rollback; + + ret = 0; + cleanup: + virObjectUnlock(hostdev_mgr->activeNVMeHostdevs); + return ret; + + rollback: + while (lastGoodNVMeIdx >= 0) { + temp = virNVMeDeviceListGet(nvmeDevices, lastGoodNVMeIdx); + + virNVMeDeviceListDel(hostdev_mgr->activeNVMeHostdevs, temp); + + lastGoodNVMeIdx--; + } + goto cleanup; +} + + +int +virHostdevPrepareNVMeDevices(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virDomainDiskDefPtr *disks, + size_t ndisks) +{ + size_t i; + ssize_t lastGoodDiskIdx = -1; + + for (i = 0; i < ndisks; i++) { + if (virHostdevPrepareOneNVMeDevice(hostdev_mgr, drv_name, + dom_name, disks[i]->src) < 0) + goto rollback; + + lastGoodDiskIdx = i; + } + + return 0; + + rollback: + while (lastGoodDiskIdx >= 0) { + if (virHostdevReAttachOneNVMeDevice(hostdev_mgr, drv_name, dom_name, + disks[lastGoodDiskIdx]->src) < 0) { + VIR_ERROR(_("Failed to reattach NVMe for disk target: %s"), + disks[lastGoodDiskIdx]->dst); + } + + lastGoodDiskIdx--; + } + + return -1; +} + + +int +virHostdevReAttachOneNVMeDevice(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virStorageSourcePtr src) +{ + g_autoptr(virNVMeDeviceList) nvmeDevices = NULL; + g_autoptr(virPCIDeviceList) pciDevices = NULL; + size_t i; + int ret = -1; + + if (!(nvmeDevices = virNVMeDeviceListNew())) + return -1; + + if (virHostdevGetNVMeDeviceList(nvmeDevices, src, drv_name, dom_name) < 0) + return -1; + + if (virNVMeDeviceListCount(nvmeDevices) == 0) + return 0; + + virObjectLock(hostdev_mgr->activeNVMeHostdevs); + + if (!(pciDevices = virNVMeDeviceListCreateReAttachList(hostdev_mgr->activeNVMeHostdevs, + nvmeDevices))) + goto cleanup; + + virHostdevReAttachPCIDevicesImpl(hostdev_mgr, + drv_name, dom_name, pciDevices, + NULL, 0, NULL); + + for (i = 0; i < virNVMeDeviceListCount(nvmeDevices); i++) { + virNVMeDevicePtr temp = virNVMeDeviceListGet(nvmeDevices, i); + + if (virNVMeDeviceListDel(hostdev_mgr->activeNVMeHostdevs, temp) < 0) + goto cleanup; + } + + ret = 0; + cleanup: + virObjectUnlock(hostdev_mgr->activeNVMeHostdevs); + return ret; +} + + +int +virHostdevReAttachNVMeDevices(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virDomainDiskDefPtr *disks, + size_t ndisks) +{ + size_t i; + int ret = 0; + + /* Contrary to virHostdevPrepareNVMeDevices, this is a best + * effort approach. Just iterate over all disks and try to + * reattach them. Don't stop at the first failure. */ + for (i = 0; i < ndisks; i++) { + if (virHostdevReAttachOneNVMeDevice(hostdev_mgr, drv_name, + dom_name, disks[i]->src) < 0) { + VIR_ERROR(_("Failed to reattach NVMe for disk target: %s"), + disks[i]->dst); + ret = -1; + } + } + + return ret; +} + + +int +virHostdevUpdateActiveNVMeDevices(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virDomainDiskDefPtr *disks, + size_t ndisks) +{ + g_autoptr(virNVMeDeviceList) nvmeDevices = NULL; + g_autoptr(virPCIDeviceList) pciDevices = NULL; + virNVMeDevicePtr temp = NULL; + size_t i; + ssize_t lastGoodNVMeIdx = -1; + ssize_t lastGoodPCIIdx = -1; + int ret = -1; + + if (!(nvmeDevices = virNVMeDeviceListNew())) + return -1; + + for (i = 0; i < ndisks; i++) { + if (virHostdevGetNVMeDeviceList(nvmeDevices, disks[i]->src, drv_name, dom_name) < 0) + return -1; + } + + if (virNVMeDeviceListCount(nvmeDevices) == 0) + return 0; + + virObjectLock(hostdev_mgr->activeNVMeHostdevs); + virObjectLock(hostdev_mgr->activePCIHostdevs); + virObjectLock(hostdev_mgr->inactivePCIHostdevs); + + if (!(pciDevices = virNVMeDeviceListCreateDetachList(hostdev_mgr->activeNVMeHostdevs, + nvmeDevices))) + goto cleanup; + + for (i = 0; i < virNVMeDeviceListCount(nvmeDevices); i++) { + temp = virNVMeDeviceListGet(nvmeDevices, i); + + if (virNVMeDeviceListAdd(hostdev_mgr->activeNVMeHostdevs, temp) < 0) + goto rollback; + + lastGoodNVMeIdx = i; + } + + for (i = 0; i < virPCIDeviceListCount(pciDevices); i++) { + virPCIDevicePtr actual = virPCIDeviceListGet(pciDevices, i); + + /* We must restore some attributes that were lost on daemon restart. */ + virPCIDeviceSetUnbindFromStub(actual, true); + if (virPCIDeviceSetUsedBy(actual, drv_name, dom_name) < 0) + goto rollback; + + if (virPCIDeviceListAddCopy(hostdev_mgr->activePCIHostdevs, actual) < 0) + goto rollback; + + lastGoodPCIIdx = i; + } + + ret = 0; + cleanup: + virObjectUnlock(hostdev_mgr->inactivePCIHostdevs); + virObjectUnlock(hostdev_mgr->activePCIHostdevs); + virObjectUnlock(hostdev_mgr->activeNVMeHostdevs); + return ret; + + rollback: + while (lastGoodNVMeIdx >= 0) { + temp = virNVMeDeviceListGet(nvmeDevices, lastGoodNVMeIdx); + + virNVMeDeviceListDel(hostdev_mgr->activeNVMeHostdevs, temp); + + lastGoodNVMeIdx--; + } + while (lastGoodPCIIdx >= 0) { + virPCIDevicePtr actual = virPCIDeviceListGet(pciDevices, i); + + virPCIDeviceListDel(hostdev_mgr->activePCIHostdevs, actual); + + lastGoodPCIIdx--; + } + goto cleanup; +} diff --git a/src/util/virhostdev.h b/src/util/virhostdev.h index 2d61c21e9d..ae84ed3d20 100644 --- a/src/util/virhostdev.h +++ b/src/util/virhostdev.h @@ -29,6 +29,7 @@ #include "virscsivhost.h" #include "conf/domain_conf.h" #include "virmdev.h" +#include "virnvme.h" typedef enum { VIR_HOSTDEV_STRICT_ACS_CHECK = (1 << 0), /* strict acs check */ @@ -53,6 +54,9 @@ struct _virHostdevManager { virSCSIDeviceListPtr activeSCSIHostdevs; virSCSIVHostDeviceListPtr activeSCSIVHostHostdevs; virMediatedDeviceListPtr activeMediatedHostdevs; + /* NVMe devices are PCI devices really, but one NVMe disk can + * have multiple namespaces. */ + virNVMeDeviceListPtr activeNVMeHostdevs; }; G_DEFINE_AUTOPTR_CLEANUP_FUNC(virHostdevManager, virObjectUnref); @@ -207,3 +211,36 @@ int virHostdevPCINodeDeviceReAttach(virHostdevManagerPtr mgr, int virHostdevPCINodeDeviceReset(virHostdevManagerPtr mgr, virPCIDevicePtr pci) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + +int +virHostdevPrepareOneNVMeDevice(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virStorageSourcePtr src); + +int +virHostdevPrepareNVMeDevices(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virDomainDiskDefPtr *disks, + size_t ndisks); + +int +virHostdevReAttachOneNVMeDevice(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virStorageSourcePtr src); + +int +virHostdevReAttachNVMeDevices(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virDomainDiskDefPtr *disks, + size_t ndisks); + +int +virHostdevUpdateActiveNVMeDevices(virHostdevManagerPtr hostdev_mgr, + const char *drv_name, + const char *dom_name, + virDomainDiskDefPtr *disks, + size_t ndisks); diff --git a/src/util/virnvme.h b/src/util/virnvme.h index 911a9d29f3..db4d72ab9a 100644 --- a/src/util/virnvme.h +++ b/src/util/virnvme.h @@ -31,6 +31,8 @@ typedef virNVMeDevice *virNVMeDevicePtr; typedef struct _virNVMeDeviceList virNVMeDeviceList; typedef virNVMeDeviceList *virNVMeDeviceListPtr; +G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNVMeDeviceList, virObjectUnref); + virNVMeDevicePtr virNVMeDeviceNew(const virPCIDeviceAddress *address, unsigned long namespace, -- 2.23.0 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list