Re: [libvirt PATCH v2 15/15] qemu: automatically bind to a vfio variant driver, if available

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/28/23 10:39 AM, Peter Krempa wrote:
On Mon, Nov 06, 2023 at 02:39:00 -0500, Laine Stump wrote:
Rather than always binding to the vfio-pci driver, use the new
function virPCIDeviceFindBestVFIOVariant() to see if the running
kernel has a VFIO variant driver available that is a better match for
the device, and if one is found, use that instead.

virPCIDeviceFindBestVFIOVariant() function reads the modalias file for
the given device from sysfs, then looks through
/lib/modules/${kernel_release}/modules.alias for the vfio_pci alias
that matches with the least number of wildcard ('*') fields.

The appropriate "VFIO variant" driver for a device will be the PCI
driver implemented by the discovered module - these drivers are
compatible with (and provide the entire API of) the standard vfio-pci
driver, but have additional device-specific APIs that can be useful
for, e.g., saving/restoring state for migration.

If a specific driver is named, that will still be used rather than
searching modules.alias; this makes it possible to force binding of
vfio-pci if there is an issue with the auto-selected variant driver.

Signed-off-by: Laine Stump <laine@xxxxxxxxxx>
---
  src/util/virpci.c | 242 ++++++++++++++++++++++++++++++++++++++++++++++
  1 file changed, 242 insertions(+)

diff --git a/src/util/virpci.c b/src/util/virpci.c
index 885958eb99..40831a5294 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -30,6 +30,10 @@
  #include <sys/stat.h>
  #include <unistd.h>
+#ifndef WIN32
+# include <sys/utsname.h>
+#endif
+
  #include "virlog.h"
  #include "virerror.h"
  #include "virfile.h"
@@ -1321,6 +1325,225 @@ virPCIDeviceFindDriver(virPCIDevice *dev)
  }
+#ifdef __linux__
+typedef struct {
+    /* this is the decomposed version of a string like:
+     *
+     *   vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN
+     *
+     * (followed by a space or newline). The "NNNN" are always of the
+     * length in the example unless replaced with a wildcard ("*"),
+     * but we make no assumptions about length.
+     *
+     * Rather than name each field, we just put them
+     * all in an array of 6 elements, so that we

                             ^^^

+     * can write a simple loop to compare them
+     */
+    char *fields[7]; /* v, d, sv, sd, bc, sc, i */

                     ^^^ ...

+} virPCIDeviceAliasInfo;
+
+
+/* NULL in last position makes parsing loop simpler */
+static const char *fieldnames[] = { "v", "d", "sv", "sd", "bc", "sc", "i", NULL };
+
+
+static void
+virPCIDeviceAliasInfoFree(virPCIDeviceAliasInfo *info)
+{
+    if (info) {
+        size_t i;
+
+        for (i = 0; i < G_N_ELEMENTS(info->fields); i++)
+            g_free(info->fields[i]);
+
+        g_free(info);
+    }
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAliasInfo, virPCIDeviceAliasInfoFree);
+
+
+static virPCIDeviceAliasInfo *
+virPCIDeviceAliasInfoNew(const char *str)
+{
+    const char *field = str;
+
+    size_t i;
+    g_autoptr(virPCIDeviceAliasInfo) ret = g_new0(virPCIDeviceAliasInfo, 1);
+
+    if (!str)
+        return g_steal_pointer(&ret);

So here you return an empty array as "success" if the input string is
empty, but note that neither virPCIDeviceAliasInfoMatch nor
virPCIDeviceAliasInfoPrint properly handle the fields being NULL.

None of the callers use this scenario so it should be removed.

Done.


+
+    /* initialize from str */
+    for (i = 0; i < G_N_ELEMENTS(ret->fields); i++) {
+        int len = strlen(fieldnames[i]);
+        const char *next;
+
+        if (strncmp(field, fieldnames[i], len))
+            return NULL;
+
+        field += len;
+        if (fieldnames[i + 1]) {
+            if (!(next = strstr(field, fieldnames[i + 1])))
+                return NULL;
+        } else {
+            next = field;
+            virSkipToSpace(&next);
+        }
+
+        ret->fields[i] = g_strndup(field, next - field);
+        field = next;
+    }
+
+    return g_steal_pointer(&ret);
+}
+
+
+static void
+virPCIDeviceAliasInfoPrint(virPCIDeviceAliasInfo *info)
+{
+    size_t i;
+
+    for (i = 0; i < G_N_ELEMENTS(info->fields); i++)
+        VIR_DEBUG("%s: '%s'", fieldnames[i], info->fields[i]);
+}
+
+
+static bool
+virPCIDeviceAliasInfoMatch(virPCIDeviceAliasInfo *orig,
+                           virPCIDeviceAliasInfo *match,
+                           int *wildCardCt)

unsigned, negative values don't seem to make sense.

Okay, I've made them all unsigned.


+{
+    size_t i;
+
+    *wildCardCt = 0;
+
+    for (i = 0; i < G_N_ELEMENTS(orig->fields); i++) {
+        if (STREQ(match->fields[i], "*"))
+            (*wildCardCt)++;
+        else if (STRNEQ(orig->fields[i], match->fields[i]))
+            return false;
+    }
+    return true;
+}
+
+
+/* virPCIDeviceFindBestVFIOVariant:
+ *
+ * Find the "best" match of all vfio_pci aliases for @dev in the host
+ * modules.alias file. This uses the algorithm of finding every
+ * modules.alias line that begins with "vfio_pci:", then picking the
+ * one that matches the device's own modalias value (from the file of
+ * that name in the device's sysfs directory) with the fewest
+ * "wildcards" (* character, meaning "match any value for this
+ * attribute").
+ */
+static int
+virPCIDeviceFindBestVFIOVariant(virPCIDevice *dev,
+                                char **moduleName)
+{
+    g_autofree char *devModAliasPath = NULL;
+    g_autofree char *devModAliasContent = NULL;
+    const char *devModAlias;
+    g_autoptr(virPCIDeviceAliasInfo) devModAliasInfo = NULL;
+    struct utsname unameInfo;
+    g_autofree char *modulesAliasPath = NULL;
+    g_autofree char *modulesAliasContent = NULL;
+    const char *line;
+    int currentBestWildcardCt = INT_MAX;

Unsigned.

+
+    *moduleName = NULL;
+
+    /* get the modalias values for the device from sysfs */
+    devModAliasPath = virPCIFile(dev->name, "modalias");

If you expect this file to be missing ...

+    if (virFileReadAll(devModAliasPath, 100, &devModAliasContent) < 0)

... then you need to check whether it exists, as this will spam logs.

It should never be missing. *Every* pci device should have a modalias file, so if it's missing that's something that everyone should know about. Just in case that *did* happen, I was reluctant to have the entire operation fail (just because it would mean that something that previously worked would no longer work), but after talking to Alex (Williamson) I've decided that if the modalias file is missing, that is enough indication that something is broken that we *should* log an error and fail.

I changed it to return -1

And also we most likely should reset the local instance of error in this
code path as virFileReadAll sets it.

+        return 0;
+
+    VIR_DEBUG("modalias path: '%s' contents: '%s'",
+              devModAliasPath, devModAliasContent);
+
+    /* "pci:vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN\n" */
+    if  ((devModAlias = STRSKIP(devModAliasContent, "pci:")) == NULL ||
+         !(devModAliasInfo = virPCIDeviceAliasInfoNew(devModAlias))) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("device modalias file %1$s content has improper format"),
+                       devModAliasPath);
+        return -1;
+    }
+
+    virPCIDeviceAliasInfoPrint(devModAliasInfo);

Since this logs 7 lines into the debug log I don't think it's a great
idea to have it in production. If you need to be sure that the parser
processes teh modalias correctly, then please add unit tests instead.
Logging the raw string, which happens above should be sufficient for
debugging.

Agreed. I had that in there when I was writing the code, and should have removed it before I posted - it really is too verbose and the "parsing" is so simplistic that it doesn't need a test case. I removed all trace of virPCIDeiceAliasInfoPrint().



+
+    uname(&unameInfo);
+    modulesAliasPath = g_strdup_printf("/lib/modules/%s/modules.alias",
+                                       unameInfo.release);
+    if (virFileReadAll(modulesAliasPath,
+                       4 * 1024 * 1024, &modulesAliasContent) < 0) {

Note that the file currently has 1.3MiB already, I'm not sure what the
potenital for growth is though, but this leaves less than an order of
magnitude in reserve.

How about increasing it to 8MB? Still too small?


Additionally the code below only currently looks at 2 of the 25k lines
in the file. Maybe it's worth caching the relevant lines?

Yes, I agree with this. I suppose the "proper" way to do that is to use virfilecache (code re-use and all that), but it will take me some time to understand that, as I've never looked at it until just now :-P.

How about a promise to do a followup "soon" that caches the useful lines and re-uses them?


+        return -1;
+    }
+
+    /* Look for all lines that are aliases for vfio_pci drivers.
+     * (The first line is always a comment, so we can be sure "alias"
+     * is preceded by a newline)
+     */
+    line = modulesAliasContent;
+
+    while ((line = strstr(line, "\nalias vfio_pci:"))) {
+        g_autoptr(virPCIDeviceAliasInfo) fileModAliasInfo = NULL;
+        int wildCardCt;
+
+        /* "alias vfio_pci:vNNNNNNNNdNNNNNNNNsvNNNNNNNNsdNNNNNNNNbcNNscNNiNN XXXX\n" */
+        line += strlen("\nalias vfio_pci:");
+        if (!(fileModAliasInfo = virPCIDeviceAliasInfoNew(line)))
+            continue;
+
+        virPCIDeviceAliasInfoPrint(fileModAliasInfo);

Logging the matched line should be sufficient. Please remove the overly
verbose virPCIDeviceAliasInfoPrint.

It's gone. It's over. </frodo>


+
+        if (virPCIDeviceAliasInfoMatch(devModAliasInfo,
+                                       fileModAliasInfo, &wildCardCt)) {
+
+            const char *aliasStart = strchr(line, ' ');
+            const char *aliasEnd = NULL;
+            g_autofree char *aliasName = NULL;
+
+            if (!aliasStart) {
+                VIR_WARN("malformed modules.alias vfio_pci: line");
+                continue;
+            }
+
+            aliasStart++;
+            line = aliasEnd = strchrnul(aliasStart, '\n');
+            aliasName = g_strndup(aliasStart, aliasEnd - aliasStart);
+
+            VIR_DEBUG("matching alias '%s' found, %d wildcards",
+                      aliasName, wildCardCt);

Possibly worth logging currentBestWildcardCt too?

Sure, why not - doesn't add any extra lines.

+
+            if (wildCardCt < currentBestWildcardCt) {
+
+                /* this is a better match than previous */
+                currentBestWildcardCt = wildCardCt;
+                g_free(*moduleName);
+                *moduleName = g_steal_pointer(&aliasName);
+            }
+        }
+    }
+    return 0;
+}
+
+
+#else /* __linux__ */
+
+
+static int
+virPCIDeviceFindBestVFIOVariant(virPCIDevice *dev G_GNUC_UNUSED,
+                                char **moduleName G_GNUC_UNUSED)
+{
+    virReportSystemError(ENOSYS, "%s",
+                         _("VFIO device assignment is not available on this platform"));
+    return -1;
+}
+#endif /* __linux__ */
+
+
  int
  virPCIDeviceUnbind(virPCIDevice *dev)
  {
@@ -1431,6 +1654,22 @@ virPCIDeviceBindToStub(virPCIDevice *dev)
          return -1;
      }
+ if (dev->stubDriverType == VIR_PCI_STUB_DRIVER_VFIO
+        && !dev->stubDriverName) {

Logic operators are commonly placed at the end of the line before.

It's been 15 years, and I *still* remember this backwards half the time!!


+        g_autofree char *autodetectModuleName = NULL;
+
+        /*  automatically use a VFIO variant driver if available for
+         *  this device.
+         */
+
+        if (virPCIDeviceFindBestVFIOVariant(dev, &autodetectModuleName) < 0)
+            return -1;
+
+        g_free(dev->stubDriverName);
+        dev->stubDriverName = g_steal_pointer(&autodetectModuleName);
+    }
+
+    /* if a driver name hasn't been decided by now, use default for this type */
      if (!dev->stubDriverName) {
const char *stubDriverName = NULL;
@@ -1538,6 +1777,9 @@ virPCIDeviceReattach(virPCIDevice *dev,
      return 0;
  }
+
+
+

Spurious whitespace change.

  static char *
  virPCIDeviceReadID(virPCIDevice *dev, const char *id_name)
  {

Reviewed-by: Peter Krempa <pkrempa@xxxxxxxxxx>
_______________________________________________
Devel mailing list -- devel@xxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxx
_______________________________________________
Devel mailing list -- devel@xxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxx




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux