This module will be used by virHostdevManager and it's inspired by virPCIDevice module. They are very similar except instead of what makes a NVMe device: PCI address AND namespace ID. This means that a NVMe device can appear in a domain multiple times, each time with a different namespace. Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> --- po/POTFILES.in | 1 + src/libvirt_private.syms | 18 ++ src/util/Makefile.inc.am | 2 + src/util/virnvme.c | 447 +++++++++++++++++++++++++++++++++++++++ src/util/virnvme.h | 95 +++++++++ 5 files changed, 563 insertions(+) create mode 100644 src/util/virnvme.c create mode 100644 src/util/virnvme.h diff --git a/po/POTFILES.in b/po/POTFILES.in index debb51cd70..3d075201c4 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -277,6 +277,7 @@ @SRCDIR@/src/util/virnetlink.c @SRCDIR@/src/util/virnodesuspend.c @SRCDIR@/src/util/virnuma.c +@SRCDIR@/src/util/virnvme.c @SRCDIR@/src/util/virobject.c @SRCDIR@/src/util/virpci.c @SRCDIR@/src/util/virperf.c diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 86ee9cbc53..aab2b49bdf 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2664,6 +2664,24 @@ virNumaSetPagePoolSize; virNumaSetupMemoryPolicy; +# util/virnvme.h +virNVMeDeviceAddressGet; +virNVMeDeviceCopy; +virNVMeDeviceFree; +virNVMeDeviceListAdd; +virNVMeDeviceListCount; +virNVMeDeviceListCreateDetachList; +virNVMeDeviceListDel; +virNVMeDeviceListGet; +virNVMeDeviceListLookup; +virNVMeDeviceListLookupIndex; +virNVMeDeviceListNew; +virNVMeDeviceNew; +virNVMeDeviceUsedByClear; +virNVMeDeviceUsedByGet; +virNVMeDeviceUsedBySet; + + # util/virobject.h virClassForObject; virClassForObjectLockable; diff --git a/src/util/Makefile.inc.am b/src/util/Makefile.inc.am index 0855f152fd..838e9479a9 100644 --- a/src/util/Makefile.inc.am +++ b/src/util/Makefile.inc.am @@ -150,6 +150,8 @@ UTIL_SOURCES = \ util/virnetlink.h \ util/virnodesuspend.c \ util/virnodesuspend.h \ + util/virnvme.c \ + util/virnvme.h \ util/virkmod.c \ util/virkmod.h \ util/virnuma.c \ diff --git a/src/util/virnvme.c b/src/util/virnvme.c new file mode 100644 index 0000000000..b8179aa431 --- /dev/null +++ b/src/util/virnvme.c @@ -0,0 +1,447 @@ +/* + * virnvme.c: helper APIs for managing NVMe devices + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "virnvme.h" +#include "virobject.h" +#include "virpci.h" +#include "viralloc.h" +#include "virlog.h" +#include "virstring.h" + +VIR_LOG_INIT("util.nvme"); +#define VIR_FROM_THIS VIR_FROM_NONE + +struct _virNVMeDevice { + virPCIDeviceAddress address; /* PCI address of controller */ + unsigned int namespace; /* Namespace ID */ + bool managed; + + char *drvname; + char *domname; +}; + + +struct _virNVMeDeviceList { + virObjectLockable parent; + + size_t count; + virNVMeDevicePtr *devs; +}; + + +static virClassPtr virNVMeDeviceListClass; + +static void virNVMeDeviceListDispose(void *obj); + +static int +virNVMeOnceInit(void) +{ + if (!VIR_CLASS_NEW(virNVMeDeviceList, virClassForObjectLockable())) + return -1; + + return 0; +} + +VIR_ONCE_GLOBAL_INIT(virNVMe); + + +virNVMeDevicePtr +virNVMeDeviceNew(const virPCIDeviceAddress *address, + unsigned long namespace, + bool managed) +{ + virNVMeDevicePtr dev = NULL; + + dev = g_new0(virNVMeDevice, 1); + + virPCIDeviceAddressCopy(&dev->address, address); + dev->namespace = namespace; + dev->managed = managed; + + return dev; +} + + +void +virNVMeDeviceFree(virNVMeDevicePtr dev) +{ + if (!dev) + return; + + virNVMeDeviceUsedByClear(dev); + VIR_FREE(dev); +} + + +virNVMeDevicePtr +virNVMeDeviceCopy(const virNVMeDevice *dev) +{ + virNVMeDevicePtr copy = NULL; + + copy = g_new0(virNVMeDevice, 1); + copy->drvname = g_strdup(dev->drvname); + copy->domname = g_strdup(dev->domname); + + virPCIDeviceAddressCopy(©->address, &dev->address); + copy->namespace = dev->namespace; + copy->managed = dev->managed; + + return copy; +} + + +const virPCIDeviceAddress * +virNVMeDeviceAddressGet(const virNVMeDevice *dev) +{ + return &dev->address; +} + + +void +virNVMeDeviceUsedByClear(virNVMeDevicePtr dev) +{ + VIR_FREE(dev->drvname); + VIR_FREE(dev->domname); +} + + +void +virNVMeDeviceUsedByGet(const virNVMeDevice *dev, + const char **drv, + const char **dom) +{ + *drv = dev->drvname; + *dom = dev->domname; +} + + +void +virNVMeDeviceUsedBySet(virNVMeDevicePtr dev, + const char *drv, + const char *dom) +{ + dev->drvname = g_strdup(drv); + dev->domname = g_strdup(dom); +} + + +virNVMeDeviceListPtr +virNVMeDeviceListNew(void) +{ + virNVMeDeviceListPtr list; + + if (virNVMeInitialize() < 0) + return NULL; + + if (!(list = virObjectLockableNew(virNVMeDeviceListClass))) + return NULL; + + return list; +} + + +static void +virNVMeDeviceListDispose(void *obj) +{ + virNVMeDeviceListPtr list = obj; + size_t i; + + for (i = 0; i < list->count; i++) + virNVMeDeviceFree(list->devs[i]); + + VIR_FREE(list->devs); +} + + +size_t +virNVMeDeviceListCount(const virNVMeDeviceList *list) +{ + return list->count; +} + + +int +virNVMeDeviceListAdd(virNVMeDeviceListPtr list, + const virNVMeDevice *dev) +{ + virNVMeDevicePtr tmp; + + if ((tmp = virNVMeDeviceListLookup(list, dev))) { + g_autofree char *addrStr = virPCIDeviceAddressAsString(&tmp->address); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("NVMe device %s namespace %u is already on the list"), + NULLSTR(addrStr), tmp->namespace); + return -1; + } + + if (!(tmp = virNVMeDeviceCopy(dev)) || + VIR_APPEND_ELEMENT(list->devs, list->count, tmp) < 0) { + virNVMeDeviceFree(tmp); + return -1; + } + + return 0; +} + + +int +virNVMeDeviceListDel(virNVMeDeviceListPtr list, + const virNVMeDevice *dev) +{ + ssize_t idx; + virNVMeDevicePtr tmp = NULL; + + if ((idx = virNVMeDeviceListLookupIndex(list, dev)) < 0) { + g_autofree char *addrStr = virPCIDeviceAddressAsString(&dev->address); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("NVMe device %s namespace %u not found"), + NULLSTR(addrStr), dev->namespace); + return -1; + } + + tmp = list->devs[idx]; + VIR_DELETE_ELEMENT(list->devs, idx, list->count); + virNVMeDeviceFree(tmp); + return 0; +} + + +virNVMeDevicePtr +virNVMeDeviceListGet(virNVMeDeviceListPtr list, + size_t i) +{ + return i < list->count ? list->devs[i] : NULL; +} + + +virNVMeDevicePtr +virNVMeDeviceListLookup(virNVMeDeviceListPtr list, + const virNVMeDevice *dev) +{ + ssize_t idx; + + if ((idx = virNVMeDeviceListLookupIndex(list, dev)) < 0) + return NULL; + + return list->devs[idx]; +} + + +ssize_t +virNVMeDeviceListLookupIndex(virNVMeDeviceListPtr list, + const virNVMeDevice *dev) +{ + size_t i; + + if (!list) + return -1; + + for (i = 0; i < list->count; i++) { + virNVMeDevicePtr other = list->devs[i]; + + if (virPCIDeviceAddressEqual(&dev->address, &other->address) && + dev->namespace == other->namespace) + return i; + } + + return -1; +} + + +static virNVMeDevicePtr +virNVMeDeviceListLookupByPCIAddress(virNVMeDeviceListPtr list, + const virPCIDeviceAddress *address) +{ + size_t i; + + if (!list) + return NULL; + + for (i = 0; i < list->count; i++) { + virNVMeDevicePtr other = list->devs[i]; + + if (virPCIDeviceAddressEqual(address, &other->address)) + return other; + } + + return NULL; +} + + +static virPCIDevicePtr +virNVMeDeviceCreatePCIDevice(const virNVMeDevice *nvme) +{ + g_autoptr(virPCIDevice) pci = NULL; + + if (!(pci = virPCIDeviceNew(nvme->address.domain, + nvme->address.bus, + nvme->address.slot, + nvme->address.function))) + return NULL; + + /* NVMe devices must be bound to vfio */ + virPCIDeviceSetStubDriver(pci, VIR_PCI_STUB_DRIVER_VFIO); + virPCIDeviceSetManaged(pci, nvme->managed); + + return g_steal_pointer(&pci); +} + + +/** + * virNVMeDeviceListCreateDetachList: + * @activeList: list of active NVMe devices + * @toDetachList: list of NVMe devices to detach from the host + * + * This function creates a list of PCI devices which can then be + * reused by PCI device detach functions (e.g. + * virHostdevPreparePCIDevicesImpl()) as each PCI device from the + * returned list is initialized properly for detach. + * + * Basically, this just blindly collects unique PCI addresses + * from @toDetachList that don't appear on @activeList. + * + * Returns: a list on success, + * NULL otherwise. + */ +virPCIDeviceListPtr +virNVMeDeviceListCreateDetachList(virNVMeDeviceListPtr activeList, + virNVMeDeviceListPtr toDetachList) +{ + g_autoptr(virPCIDeviceList) pciDevices = NULL; + size_t i; + + if (!(pciDevices = virPCIDeviceListNew())) + return NULL; + + for (i = 0; i < toDetachList->count; i++) { + const virNVMeDevice *d = toDetachList->devs[i]; + g_autoptr(virPCIDevice) pci = NULL; + + /* If there is a NVMe device with the same PCI address on + * the activeList, the device is already detached. */ + if (virNVMeDeviceListLookupByPCIAddress(activeList, &d->address)) + continue; + + /* It may happen that we want to detach two namespaces + * from the same NVMe device. This will be represented as + * two different instances of virNVMeDevice, but + * obviously we want to put the PCI device on the detach + * list only once. */ + if (virPCIDeviceListFindByIDs(pciDevices, + d->address.domain, + d->address.bus, + d->address.slot, + d->address.function)) + continue; + + if (!(pci = virNVMeDeviceCreatePCIDevice(d))) + return NULL; + + if (virPCIDeviceListAdd(pciDevices, pci) < 0) + return NULL; + + /* avoid freeing the device */ + pci = NULL; + } + + return g_steal_pointer(&pciDevices); +} + + +/** + * virNVMeDeviceListCreateReAttachList: + * @activeList: list of active NVMe devices + * @toReAttachList: list of devices to reattach to the host + * + * This is a counterpart to virNVMeDeviceListCreateDetachList. + * + * This function creates a list of PCI devices which can then be + * reused by PCI device reattach functions (e.g. + * virHostdevReAttachPCIDevicesImpl()) as each PCI device from + * the returned list is initialized properly for reattach. + * + * Basically, this just collects unique PCI addresses + * of devices that appear on @toReAttachList and are used + * exactly once (i.e. no other namespaces are used from the same + * NVMe device). For that purpose, this function needs to know + * list of active NVMe devices (@activeList). + * + * Returns: a list on success, + * NULL otherwise. + */ +virPCIDeviceListPtr +virNVMeDeviceListCreateReAttachList(virNVMeDeviceListPtr activeList, + virNVMeDeviceListPtr toReAttachList) +{ + g_autoptr(virPCIDeviceList) pciDevices = NULL; + size_t i; + + if (!(pciDevices = virPCIDeviceListNew())) + return NULL; + + for (i = 0; i < toReAttachList->count; i++) { + const virNVMeDevice *d = toReAttachList->devs[i]; + g_autoptr(virPCIDevice) pci = NULL; + size_t nused = 0; + + /* Check if there is any other NVMe device with the same PCI address as + * @d. To simplify this, let's just count how many NVMe devices with + * the same PCI address there are on the @activeList. */ + for (i = 0; i < activeList->count; i++) { + virNVMeDevicePtr other = activeList->devs[i]; + + if (!virPCIDeviceAddressEqual(&d->address, &other->address)) + continue; + + nused++; + } + + /* Now, the following cases can happen: + * nused > 1 -> there are other NVMe device active, do NOT detach it + * nused == 1 -> we've found only @d on the @activeList, detach it + * nused == 0 -> huh, wait, what? @d is NOT on the @active list, how can + * we reattach it? + */ + + if (nused == 0) { + /* Shouldn't happen (TM) */ + g_autofree char *addrStr = virPCIDeviceAddressAsString(&d->address); + virReportError(VIR_ERR_INTERNAL_ERROR, + _("NVMe device %s namespace %u not found"), + NULLSTR(addrStr), d->namespace); + return NULL; + } else if (nused > 1) { + /* NVMe device is still in use */ + continue; + } + + /* nused == 1 -> detach the device */ + if (!(pci = virNVMeDeviceCreatePCIDevice(d))) + return NULL; + + if (virPCIDeviceListAdd(pciDevices, pci) < 0) + return NULL; + + /* avoid freeing the device */ + pci = NULL; + } + + return g_steal_pointer(&pciDevices); +} diff --git a/src/util/virnvme.h b/src/util/virnvme.h new file mode 100644 index 0000000000..911a9d29f3 --- /dev/null +++ b/src/util/virnvme.h @@ -0,0 +1,95 @@ +/* + * virnvme.h: helper APIs for managing NVMe devices + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include "virpci.h" + +typedef struct _virNVMeDevice virNVMeDevice; +typedef virNVMeDevice *virNVMeDevicePtr; + +/* Note that this list is lockable, and in fact, it is caller's + * responsibility to acquire the lock and release it. The reason + * is that in a lot of cases the list must be locked between two + * API calls and therefore only caller knows when it is safe to + * finally release the lock. */ +typedef struct _virNVMeDeviceList virNVMeDeviceList; +typedef virNVMeDeviceList *virNVMeDeviceListPtr; + +virNVMeDevicePtr +virNVMeDeviceNew(const virPCIDeviceAddress *address, + unsigned long namespace, + bool managed); + +void +virNVMeDeviceFree(virNVMeDevicePtr dev); + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNVMeDevice, virNVMeDeviceFree); + +virNVMeDevicePtr +virNVMeDeviceCopy(const virNVMeDevice *dev); + +const virPCIDeviceAddress * +virNVMeDeviceAddressGet(const virNVMeDevice *dev); + +void +virNVMeDeviceUsedByClear(virNVMeDevicePtr dev); + +void +virNVMeDeviceUsedByGet(const virNVMeDevice *dev, + const char **drv, + const char **dom); + +void +virNVMeDeviceUsedBySet(virNVMeDevicePtr dev, + const char *drv, + const char *dom); + +virNVMeDeviceListPtr +virNVMeDeviceListNew(void); + +size_t +virNVMeDeviceListCount(const virNVMeDeviceList *list); + +int +virNVMeDeviceListAdd(virNVMeDeviceListPtr list, + const virNVMeDevice *dev); + +int +virNVMeDeviceListDel(virNVMeDeviceListPtr list, + const virNVMeDevice *dev); + +virNVMeDevicePtr +virNVMeDeviceListGet(virNVMeDeviceListPtr list, + size_t i); + +virNVMeDevicePtr +virNVMeDeviceListLookup(virNVMeDeviceListPtr list, + const virNVMeDevice *dev); + +ssize_t +virNVMeDeviceListLookupIndex(virNVMeDeviceListPtr list, + const virNVMeDevice *dev); + +virPCIDeviceListPtr +virNVMeDeviceListCreateDetachList(virNVMeDeviceListPtr activeList, + virNVMeDeviceListPtr toDetachList); + +virPCIDeviceListPtr +virNVMeDeviceListCreateReAttachList(virNVMeDeviceListPtr activeList, + virNVMeDeviceListPtr toReAttachList); -- 2.23.0 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list