The parent device needs to report the generic stuff about the supported mediated devices types, like device API, available instances, type name, etc. Therefore this patch introduces a new nested capability element of type 'mdev' with the resulting XML of the following format: <device> ... <capability type='pci'> ... <capability type='mdev'> <type id='vendor-supplied-id'> <description>optional, raw, unstructured resource allocation data </description> <deviceAPI>vfio-pci</deviceAPI> <availableInstances>NUM</availableInstances> </type> ... <type> ... </type> </capability> </capability> ... </device> Signed-off-by: Erik Skultety <eskultet@xxxxxxxxxx> --- docs/schemas/nodedev.rng | 24 ++++ src/conf/node_device_conf.c | 103 +++++++++++++++++ src/conf/node_device_conf.h | 17 +++ src/libvirt_private.syms | 1 + src/node_device/node_device_udev.c | 133 ++++++++++++++++++++++ tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml | 27 +++++ 6 files changed, 305 insertions(+) create mode 100644 tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml diff --git a/docs/schemas/nodedev.rng b/docs/schemas/nodedev.rng index 0f90a73c8..4b5dca777 100644 --- a/docs/schemas/nodedev.rng +++ b/docs/schemas/nodedev.rng @@ -205,6 +205,30 @@ </optional> <optional> + <element name='capability'> + <attribute name='type'> + <value>mdev</value> + </attribute> + <element name='type'> + <attribute name='id'> + <data type='string'/> + </attribute> + <optional> + <element name='name'><text/></element> + </optional> + <element name='deviceAPI'> + <choice> + <value>vfio-pci</value> + </choice> + </element> + <element name='availableInstances'> + <ref name='unsignedInt'/> + </element> + </element> + </element> + </optional> + + <optional> <element name='iommuGroup'> <attribute name='number'> <ref name='unsignedInt'/> diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c index fdddc97eb..fe4f1bc60 100644 --- a/src/conf/node_device_conf.c +++ b/src/conf/node_device_conf.c @@ -87,6 +87,26 @@ virNodeDevCapsDefParseString(const char *xpath, } +static void +virNodeDevCapMdevClear(virNodeDevCapMdevPtr mdev) +{ + VIR_FREE(mdev->type); + VIR_FREE(mdev->name); + VIR_FREE(mdev->device_api); +} + + +void +virNodeDevCapMdevFree(virNodeDevCapMdevPtr mdev) +{ + if (!mdev) + return; + + virNodeDevCapMdevClear(mdev); + VIR_FREE(mdev); +} + + void virNodeDeviceDefFree(virNodeDeviceDefPtr def) { @@ -264,6 +284,27 @@ virNodeDeviceCapPCIDefFormat(virBufferPtr buf, virBufferAsprintf(buf, "<capability type='%s'/>\n", virPCIHeaderTypeToString(data->pci_dev.hdrType)); } + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_MDEV) { + virBufferAddLit(buf, "<capability type='mdev'>\n"); + virBufferAdjustIndent(buf, 2); + for (i = 0; i < data->pci_dev.nmdevs; i++) { + virNodeDevCapMdevPtr mdev = data->pci_dev.mdevs[i]; + virBufferEscapeString(buf, "<type id='%s'>\n", mdev->type); + virBufferAdjustIndent(buf, 2); + if (mdev->name) + virBufferAsprintf(buf, "<name>%s</name>\n", + mdev->name); + virBufferAsprintf(buf, "<deviceAPI>%s</deviceAPI>\n", + mdev->device_api); + virBufferAsprintf(buf, + "<availableInstances>%u</availableInstances>\n", + mdev->available_instances); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</type>\n"); + } + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</capability>\n"); + } if (data->pci_dev.nIommuGroupDevices) { virBufferAsprintf(buf, "<iommuGroup number='%d'>\n", data->pci_dev.iommuGroupNumber); @@ -1358,6 +1399,62 @@ virNodeDevPCICapSRIOVParseXML(xmlXPathContextPtr ctxt, static int +virNodeDevPCICapMediatedDevParseXML(xmlXPathContextPtr ctxt, + virNodeDevCapPCIDevPtr pci_dev) +{ + int ret = -1; + xmlNodePtr orignode = NULL; + xmlNodePtr *nodes = NULL; + int nmdevs = virXPathNodeSet("./type", ctxt, &nodes); + virNodeDevCapMdevPtr mdev = NULL; + size_t i; + + orignode = ctxt->node; + for (i = 0; i < nmdevs; i++) { + ctxt->node = nodes[i]; + + if (VIR_ALLOC(mdev) < 0) + goto cleanup; + + if (!(mdev->type = virXPathString("string(./@id[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("missing 'id' attribute for mediated device's " + "<type> element")); + goto cleanup; + } + + if (!(mdev->device_api = virXPathString("string(./deviceAPI[1])", ctxt))) { + virReportError(VIR_ERR_XML_ERROR, + _("missing device API for mediated device type '%s'"), + mdev->type); + goto cleanup; + } + + if (virXPathUInt("number(./availableInstances)", ctxt, + &mdev->available_instances) < 0) { + virReportError(VIR_ERR_XML_ERROR, + _("missing number of available instances for " + "mediated device type '%s'"), + mdev->type); + goto cleanup; + } + + mdev->name = virXPathString("string(./name)", ctxt); + + if (VIR_APPEND_ELEMENT(pci_dev->mdevs, pci_dev->nmdevs, mdev) < 0) + goto cleanup; + } + + pci_dev->flags |= VIR_NODE_DEV_CAP_FLAG_PCI_MDEV; + ret = 0; + cleanup: + virNodeDevCapMdevFree(mdev); + ctxt->node = orignode; + return ret; +} + + +static int virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, xmlNodePtr node, virNodeDevCapPCIDevPtr pci_dev) @@ -1382,6 +1479,9 @@ virNodeDevPCICapabilityParseXML(xmlXPathContextPtr ctxt, if (sriov_cap && virNodeDevPCICapSRIOVParseXML(ctxt, node, pci_dev, sriov_cap) < 0) { goto cleanup; + } if (STREQ(type, "mdev") && + virNodeDevPCICapMediatedDevParseXML(ctxt, pci_dev)) { + goto cleanup; } else { int hdrType = virPCIHeaderTypeFromString(type); @@ -1894,6 +1994,9 @@ virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) VIR_FREE(data->pci_dev.iommuGroupDevices[i]); VIR_FREE(data->pci_dev.iommuGroupDevices); virPCIEDeviceInfoFree(data->pci_dev.pci_express); + for (i = 0; i < data->pci_dev.nmdevs; i++) + virNodeDevCapMdevFree(data->pci_dev.mdevs[i]); + VIR_FREE(data->pci_dev.mdevs); break; case VIR_NODE_DEV_CAP_USB_DEV: VIR_FREE(data->usb_dev.product_name); diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h index 375f97256..883fa017e 100644 --- a/src/conf/node_device_conf.h +++ b/src/conf/node_device_conf.h @@ -94,6 +94,7 @@ typedef enum { VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0), VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1), VIR_NODE_DEV_CAP_FLAG_PCIE = (1 << 2), + VIR_NODE_DEV_CAP_FLAG_PCI_MDEV = (1 << 3), } virNodeDevPCICapFlags; typedef enum { @@ -132,6 +133,16 @@ struct _virNodeDevCapSystem { virNodeDevCapSystemFirmware firmware; }; +typedef struct _virNodeDevCapMdev virNodeDevCapMdev; +typedef virNodeDevCapMdev *virNodeDevCapMdevPtr; +struct _virNodeDevCapMdev { + char *type; + char *name; + char *device_api; + unsigned int available_instances; + unsigned int iommuGroupNumber; +}; + typedef struct _virNodeDevCapPCIDev virNodeDevCapPCIDev; typedef virNodeDevCapPCIDev *virNodeDevCapPCIDevPtr; struct _virNodeDevCapPCIDev { @@ -155,6 +166,8 @@ struct _virNodeDevCapPCIDev { int numa_node; virPCIEDeviceInfoPtr pci_express; int hdrType; /* enum virPCIHeaderType or -1 */ + virNodeDevCapMdevPtr *mdevs; + size_t nmdevs; }; typedef struct _virNodeDevCapUSBDev virNodeDevCapUSBDev; @@ -263,6 +276,7 @@ struct _virNodeDevCapData { virNodeDevCapStorage storage; virNodeDevCapSCSIGeneric sg; virNodeDevCapDRM drm; + virNodeDevCapMdev mdev; }; }; @@ -339,6 +353,9 @@ virNodeDeviceDefFree(virNodeDeviceDefPtr def); void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps); +void +virNodeDevCapMdevFree(virNodeDevCapMdevPtr mdev); + # define VIR_CONNECT_LIST_NODE_DEVICES_FILTERS_CAP \ (VIR_CONNECT_LIST_NODE_DEVICES_CAP_SYSTEM | \ VIR_CONNECT_LIST_NODE_DEVICES_CAP_PCI_DEV | \ diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 181e17875..d1e872e60 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -665,6 +665,7 @@ virNetDevIPRouteParseXML; # conf/node_device_conf.h +virNodeDevCapMdevFree; virNodeDevCapsDefFree; virNodeDevCapTypeFromString; virNodeDevCapTypeToString; diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c index 95c1aee29..79f1537d9 100644 --- a/src/node_device/node_device_udev.c +++ b/src/node_device/node_device_udev.c @@ -314,6 +314,133 @@ static int udevTranslatePCIIds(unsigned int vendor, } +static int +udevGetMdevCaps(struct udev_device *device, + const char *sysfspath, + virNodeDevCapMdevPtr mdev) +{ + int ret = -1; + char *attrpath = NULL; /* relative path to the actual sysfs attribute */ + const char *devpath = NULL; /* base sysfs path as reported by udev */ + const char *relpath = NULL; /* diff between @sysfspath and @devpath */ + char *tmp = NULL; + +#define MDEV_GET_SYSFS_ATTR(attr_name, dir, cb, ...) \ + do { \ + if (virAsprintf(&attrpath, "%s/%s", dir, #attr_name) < 0) \ + goto cleanup; \ + \ + if (cb(device, attrpath, __VA_ARGS__) < 0) \ + goto cleanup; \ + \ + VIR_FREE(attrpath); \ + } while (0) \ + + /* UDEV doesn't report attributes under subdirectories by default but is + * able to query them if the path to the attribute is relative to the + * device's base path, e.g. /sys/devices/../0000:00:01.0/ is the device's + * base path as udev reports it, but we're interested in attributes under + * /sys/devices/../0000:00:01.0/mdev_supported_types/<type>/. So, let's + * strip the common part of the path and let udev chew the relative bit. + */ + devpath = udev_device_get_syspath(device); + relpath = sysfspath + strlen(devpath); + + /* When calling from the mdev child device, @sysfspath is a symbolic link + * to the actual mdev type (rather than a physical path), so we need to + * resolve it in order to get the type's name. + */ + if (virFileResolveLink(sysfspath, &tmp) < 0) + goto cleanup; + + if (VIR_STRDUP(mdev->type, last_component(tmp)) < 0) + goto cleanup; + + MDEV_GET_SYSFS_ATTR(name, relpath, + udevGetStringSysfsAttr, &mdev->name); + MDEV_GET_SYSFS_ATTR(device_api, relpath, + udevGetStringSysfsAttr, &mdev->device_api); + MDEV_GET_SYSFS_ATTR(available_instances, relpath, + udevGetUintSysfsAttr, &mdev->available_instances, 10); + +#undef MDEV_GET_SYSFS_ATTR + + ret = 0; + cleanup: + VIR_FREE(attrpath); + VIR_FREE(tmp); + return ret; +} + + +static int +udevPCIGetMdevCaps(struct udev_device *device, + virNodeDevCapPCIDevPtr pcidata) +{ + int ret = -1; + int direrr = -1; + DIR *dir = NULL; + struct dirent *entry; + char *path = NULL; + char *tmppath = NULL; + virNodeDevCapMdevPtr mdev = NULL; + virNodeDevCapMdevPtr *mdevs = NULL; + size_t nmdevs = 0; + size_t i; + + if (virAsprintf(&path, "%s/mdev_supported_types", + udev_device_get_syspath(device)) < 0) + return -1; + + if ((direrr = virDirOpenIfExists(&dir, path)) < 0) + goto cleanup; + + if (direrr == 0) { + ret = 0; + goto cleanup; + } + + if (VIR_ALLOC(mdevs) < 0) + goto cleanup; + + /* since udev doesn't provide means to list other than top-level + * attributes, we need to scan the subdirectories ourselves + */ + while ((direrr = virDirRead(dir, &entry, path)) > 0) { + if (VIR_ALLOC(mdev) < 0) + goto cleanup; + + if (virAsprintf(&tmppath, "%s/%s", path, entry->d_name) < 0) + goto cleanup; + + if (udevGetMdevCaps(device, tmppath, mdev) < 0) + goto cleanup; + + if (VIR_APPEND_ELEMENT(mdevs, nmdevs, mdev) < 0) + goto cleanup; + + VIR_FREE(tmppath); + } + + if (direrr < 0) + goto cleanup; + + VIR_STEAL_PTR(pcidata->mdevs, mdevs); + pcidata->nmdevs = nmdevs; + nmdevs = 0; + ret = 0; + cleanup: + virNodeDevCapMdevFree(mdev); + for (i = 0; i < nmdevs; i++) + virNodeDevCapMdevFree(mdevs[i]); + VIR_FREE(mdevs); + VIR_FREE(path); + VIR_FREE(tmppath); + VIR_DIR_CLOSE(dir); + return ret; +} + + static int udevProcessPCI(struct udev_device *device, virNodeDeviceDefPtr def) { @@ -400,6 +527,12 @@ static int udevProcessPCI(struct udev_device *device, } } + /* check whether the device is mediated devices framework capable, if so, + * process it + */ + if (udevPCIGetMdevCaps(device, pci_dev) < 0) + goto cleanup; + ret = 0; cleanup: diff --git a/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml b/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml new file mode 100644 index 000000000..b745686d3 --- /dev/null +++ b/tests/nodedevschemadata/pci_0000_02_10_7_mdev.xml @@ -0,0 +1,27 @@ +<device> + <name>pci_0000_02_10_7</name> + <parent>pci_0000_00_04_0</parent> + <capability type='pci'> + <domain>0</domain> + <bus>2</bus> + <slot>16</slot> + <function>7</function> + <product id='0x10ca'>82576 Virtual Function</product> + <vendor id='0x8086'>Intel Corporation</vendor> + <capability type='mdev'> + <type id='foo'> + <name>bar</name> + <deviceAPI>vfio-pci</deviceAPI> + <availableInstances>1</availableInstances> + </type> + </capability> + <iommuGroup number='31'> + <address domain='0x0000' bus='0x02' slot='0x10' function='0x7'/> + </iommuGroup> + <numa node='0'/> + <pci-express> + <link validity='cap' port='0' speed='2.5' width='4'/> + <link validity='sta' width='0'/> + </pci-express> + </capability> +</device> -- 2.12.2 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list