Rather than always binding to the vfio-pci driver, use the new function virPCIDeviceFindBestVFIOVariant() to see if the running kernel has a VFIO variant driver available that is a better match for the device, and if one is found, use that instead. virPCIDeviceFindBestVFIOVariant() function reads the modalias file for the given device from sysfs, then looks through /lib/modules/${kernel_release}/modules.alias for the vfio_pci alias that matches with the least number of wildcard ('*') fields. The appropriate "VFIO variant" driver for a device will be the PCI driver implemented by the discovered module - these drivers are compatible with (and provide the entire API of) the standard vfio-pci driver, but have additional device-specific APIs that can be useful for, e.g., saving/restoring state for migration. If a specific driver is named (using <driver model='blah'/> in the device XML), that will still be used rather than searching modules.alias; this makes it possible to force binding of vfio-pci if there is an issue with the auto-selected variant driver. Signed-off-by: Laine Stump <laine@xxxxxxxxxx> --- Changes from V2: * fail if device modalias file isn't found. * use unsigned int instead of int for wildcardCt * increase file memory buffer from 4MB to 8MB * other minor nits pointed out by Peter src/libvirt_private.syms | 1 + src/util/virpci.c | 219 +++++++++++++++++++++++++++++++++++++++ src/util/virpci.h | 2 + tests/virpcimock.c | 9 ++ 4 files changed, 231 insertions(+) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index cccefe5a81..2cf01c5f50 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -3076,6 +3076,7 @@ virPCIDeviceCopy; virPCIDeviceDetach; virPCIDeviceExists; virPCIDeviceFileIterate; +virPCIDeviceFindBestVFIOVariant; virPCIDeviceFree; virPCIDeviceGetAddress; virPCIDeviceGetConfigPath; diff --git a/src/util/virpci.c b/src/util/virpci.c index f6bdf56057..99d6740ed1 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -30,6 +30,10 @@ #include <sys/stat.h> #include <unistd.h> +#ifdef __linux__ +# include <sys/utsname.h> +#endif + #include "virlog.h" #include "virerror.h" #include "virfile.h" @@ -1321,6 +1325,206 @@ virPCIDeviceFindDriver(virPCIDevice *dev) } +#ifdef __linux__ +typedef struct { + /* this is the decomposed version of a string like: + * + * vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN + * + * (followed by a space or newline). The "NNNN" are always of the + * length in the example unless replaced with a wildcard ("*"), + * but we make no assumptions about length. + * + * Rather than name each field, we just put them + * all in an array of 7 elements, so that we + * can write a simple loop to compare them + */ + char *fields[7]; /* v, d, sv, sd, bc, sc, i */ +} virPCIDeviceAliasInfo; + + +/* NULL in last position makes parsing loop simpler */ +static const char *fieldnames[] = { "v", "d", "sv", "sd", "bc", "sc", "i", NULL }; + + +static void +virPCIDeviceAliasInfoFree(virPCIDeviceAliasInfo *info) +{ + if (info) { + size_t i; + + for (i = 0; i < G_N_ELEMENTS(info->fields); i++) + g_free(info->fields[i]); + + g_free(info); + } +} + +G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAliasInfo, virPCIDeviceAliasInfoFree); + + +static virPCIDeviceAliasInfo * +virPCIDeviceAliasInfoNew(const char *str) +{ + const char *field = str; + + size_t i; + g_autoptr(virPCIDeviceAliasInfo) ret = g_new0(virPCIDeviceAliasInfo, 1); + + /* initialize from str */ + for (i = 0; i < G_N_ELEMENTS(ret->fields); i++) { + int len = strlen(fieldnames[i]); + const char *next; + + if (strncmp(field, fieldnames[i], len)) + return NULL; + + field += len; + if (fieldnames[i + 1]) { + if (!(next = strstr(field, fieldnames[i + 1]))) + return NULL; + } else { + next = field; + while (*next && !g_ascii_isspace(*next)) + next++; + } + + ret->fields[i] = g_strndup(field, next - field); + field = next; + } + + return g_steal_pointer(&ret); +} + + +static bool +virPCIDeviceAliasInfoMatch(virPCIDeviceAliasInfo *orig, + virPCIDeviceAliasInfo *match, + unsigned int *wildCardCt) +{ + size_t i; + + *wildCardCt = 0; + + for (i = 0; i < G_N_ELEMENTS(orig->fields); i++) { + if (STREQ(match->fields[i], "*")) + (*wildCardCt)++; + else if (STRNEQ(orig->fields[i], match->fields[i])) + return false; + } + return true; +} + + +/* virPCIDeviceFindBestVFIOVariant: + * + * Find the "best" match of all vfio_pci aliases for @dev in the host + * modules.alias file. This uses the algorithm of finding every + * modules.alias line that begins with "vfio_pci:", then picking the + * one that matches the device's own modalias value (from the file of + * that name in the device's sysfs directory) with the fewest + * "wildcards" (* character, meaning "match any value for this + * attribute"). + */ +int +virPCIDeviceFindBestVFIOVariant(virPCIDevice *dev, + char **moduleName) +{ + g_autofree char *devModAliasPath = NULL; + g_autofree char *devModAliasContent = NULL; + const char *devModAlias; + g_autoptr(virPCIDeviceAliasInfo) devModAliasInfo = NULL; + struct utsname unameInfo; + g_autofree char *modulesAliasPath = NULL; + g_autofree char *modulesAliasContent = NULL; + const char *line; + unsigned int currentBestWildcardCt = INT_MAX; + + *moduleName = NULL; + + /* get the modalias values for the device from sysfs */ + devModAliasPath = virPCIFile(dev->name, "modalias"); + if (virFileReadAll(devModAliasPath, 100, &devModAliasContent) < 0) + return -1; + + VIR_DEBUG("modalias path: '%s' contents: '%s'", + devModAliasPath, devModAliasContent); + + /* "pci:vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN\n" */ + if ((devModAlias = STRSKIP(devModAliasContent, "pci:")) == NULL || + !(devModAliasInfo = virPCIDeviceAliasInfoNew(devModAlias))) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("device modalias file %1$s content has improper format"), + devModAliasPath); + return -1; + } + + uname(&unameInfo); + modulesAliasPath = g_strdup_printf("/lib/modules/%s/modules.alias", unameInfo.release); + if (virFileReadAll(modulesAliasPath, 8 * 1024 * 1024, &modulesAliasContent) < 0) + return -1; + + /* Look for all lines that are aliases for vfio_pci drivers. + * (The first line is always a comment, so we can be sure "alias" + * is preceded by a newline) + */ + line = modulesAliasContent; + + while ((line = strstr(line, "\nalias vfio_pci:"))) { + g_autoptr(virPCIDeviceAliasInfo) fileModAliasInfo = NULL; + unsigned int wildCardCt; + + /* "alias vfio_pci:vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN XXXX\n" */ + line += strlen("\nalias vfio_pci:"); + if (!(fileModAliasInfo = virPCIDeviceAliasInfoNew(line))) + continue; + + if (virPCIDeviceAliasInfoMatch(devModAliasInfo, + fileModAliasInfo, &wildCardCt)) { + + const char *aliasStart = strchr(line, ' '); + const char *aliasEnd = NULL; + g_autofree char *aliasName = NULL; + + if (!aliasStart) { + VIR_WARN("malformed modules.alias vfio_pci: line"); + continue; + } + + aliasStart++; + line = aliasEnd = strchrnul(aliasStart, '\n'); + aliasName = g_strndup(aliasStart, aliasEnd - aliasStart); + + VIR_DEBUG("matching alias '%s' found, %u wildcards, best previously was %u", + aliasName, wildCardCt, currentBestWildcardCt); + + if (wildCardCt < currentBestWildcardCt) { + + /* this is a better match than previous */ + currentBestWildcardCt = wildCardCt; + g_free(*moduleName); + *moduleName = g_steal_pointer(&aliasName); + } + } + } + return 0; +} + + +#else /* __linux__ */ + + +int +virPCIDeviceFindBestVFIOVariant(virPCIDevice *dev G_GNUC_UNUSED, + char **moduleName G_GNUC_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("VFIO device assignment is not available on this platform")); + return -1; +} +#endif /* __linux__ */ + + int virPCIDeviceUnbind(virPCIDevice *dev) { @@ -1431,6 +1635,21 @@ virPCIDeviceBindToStub(virPCIDevice *dev) return -1; } + if (dev->stubDriverType == VIR_PCI_STUB_DRIVER_VFIO && !dev->stubDriverName) { + g_autofree char *autodetectModuleName = NULL; + + /* automatically use a VFIO variant driver if available for + * this device. + */ + + if (virPCIDeviceFindBestVFIOVariant(dev, &autodetectModuleName) < 0) + return -1; + + g_free(dev->stubDriverName); + dev->stubDriverName = g_steal_pointer(&autodetectModuleName); + } + + /* if a driver name hasn't been decided by now, use default for this type */ if (!dev->stubDriverName) { const char *stubDriverName = NULL; diff --git a/src/util/virpci.h b/src/util/virpci.h index bc7cb2329f..a5bfe9c35d 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -124,6 +124,8 @@ int virPCIDeviceReset(virPCIDevice *dev, virPCIDeviceList *activeDevs, virPCIDeviceList *inactiveDevs); +int virPCIDeviceFindBestVFIOVariant(virPCIDevice *dev, char **moduleName) G_NO_INLINE; + void virPCIDeviceSetManaged(virPCIDevice *dev, bool managed); bool virPCIDeviceGetManaged(virPCIDevice *dev); diff --git a/tests/virpcimock.c b/tests/virpcimock.c index b2111794e6..13b37bb23d 100644 --- a/tests/virpcimock.c +++ b/tests/virpcimock.c @@ -25,6 +25,7 @@ #if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) # define VIR_MOCK_LOOKUP_MAIN # include "virmock.h" +# include "virpci.h" # include <unistd.h> # include <fcntl.h> # include <sys/stat.h> @@ -925,6 +926,14 @@ pci_driver_handle_unbind(const char *path) } + +int +virPCIDeviceFindBestVFIOVariant(virPCIDevice *dev G_GNUC_UNUSED, + char **moduleName G_GNUC_UNUSED) +{ + return 0; +} + /* * Functions to load the symbols and init the environment */ -- 2.43.0 _______________________________________________ Devel mailing list -- devel@xxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxx