On Fri, Dec 04, 2009 at 10:30:46AM -0500, Dave Allan wrote: > Daniel P. Berrange wrote: > > > >I don't much like including the raw BDF format, because it is effectively > >adding a 3rd way of specifying PCI addresses in libvirt XML. We already > >have 2 different ways, which is 1 too many > > > >Node dev XML > > > > <capability type='pci'> > > <domain>0</domain> > > <bus>0</bus> > > <slot>31</slot> > > <function>1</function> > > </capability> > > > > > >Domain XML for host devices & guest addresses > > > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x5'/> > > > >Yes, my bad for screwing up the nodedev XML format when I designed > >it, stupidly choosing decimal instead of hex :-( > > > >I think we should make the virtual/physical function follow the domain > >XML style, with an <address/> element. Perhaps also use nested capabilities > >in the way we did for NPIV host/vport stuff > > > > <capability type='physical_function'> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x1'/> > > </capability> > > > > <capability type='virtual_functions'> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x3'/> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x4'/> > > </capability> > > > > > >I'd even be inclined to retro-fit the existing <capability type='pci'> > >XML to duplicate the address info in this saner format eg > > > > <capability type='pci'> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x1' /> > > <domain>0</domain> > > <bus>0</bus> > > <slot>31</slot> > > <function>1</function> > > </capability> > > > > > >So, a PCI card with SR-IOV would end up looking like > > > > <capability type='pci'> > > <domain>0</domain> > > <bus>0</bus> > > <slot>31</slot> > > <function>1</function> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x1' /> > > > > <capability type='virtual_functions'> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x3'/> > > <address domain='0x0000' bus='0x00' slot='0x1f' function='0x4'/> > > </capability> > > </capability> > > > >The nice thing about this kind of nesting is that it would let us show > >that a card is capable of exposing virtual functions, even if it does > >not currently have any enabled > > > > > >Regards, > >Daniel > > Here's an updated patch reflecting the above suggestions. > > Dave > > From d11d00d93c4bfbfd1125560ce80abfdbf88de94b Mon Sep 17 00:00:00 2001 > From: David Allan <dallan@xxxxxxxxxx> > Date: Mon, 30 Nov 2009 15:58:47 -0500 > Subject: [PATCH 1/1] Add SR IOV physical and virtual function relationships > > --- > src/conf/node_device_conf.c | 30 +++++ > src/conf/node_device_conf.h | 16 +++ > src/node_device/node_device_driver.h | 14 ++ > src/node_device/node_device_hal.c | 6 + > src/node_device/node_device_linux_sysfs.c | 200 ++++++++++++++++++++++++++++- > 5 files changed, 265 insertions(+), 1 deletions(-) > > diff --git a/src/conf/node_device_conf.c b/src/conf/node_device_conf.c > index 6003ab1..a8ecfb0 100644 > --- a/src/conf/node_device_conf.c > +++ b/src/conf/node_device_conf.c > @@ -246,6 +246,7 @@ char *virNodeDeviceDefFormat(virConnectPtr conn, > { > virBuffer buf = VIR_BUFFER_INITIALIZER; > virNodeDevCapsDefPtr caps; > + unsigned int i = 0; > char *tmp; > > virBufferAddLit(&buf, "<device>\n"); > @@ -318,6 +319,30 @@ char *virNodeDeviceDefFormat(virConnectPtr conn, > data->pci_dev.vendor_name); > else > virBufferAddLit(&buf, " />\n"); > + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION) { > + virBufferAddLit(&buf, " <capability type='physical function'>\n"); Can we change that to 'phys_function' > + virBufferVSprintf(&buf, > + " <address domain='0x%.4x' bus='0x%.2x' " > + "slot='0x%.2x' function='0x%.1x'/>\n", > + data->pci_dev.physical_function->domain, > + data->pci_dev.physical_function->bus, > + data->pci_dev.physical_function->slot, > + data->pci_dev.physical_function->function); > + virBufferAddLit(&buf, " </capability>\n"); > + } > + if (data->pci_dev.flags & VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION) { > + virBufferAddLit(&buf, " <capability type='virtual functions'>\n"); And 'virt_function', because its nice to not have whitespace in the values > + for (i = 0 ; i < data->pci_dev.num_virtual_functions ; i++) { > + virBufferVSprintf(&buf, > + " <address domain='0x%.4x' bus='0x%.2x' " > + "slot='0x%.2x' function='0x%.1x'/>\n", > + data->pci_dev.virtual_functions[i]->domain, > + data->pci_dev.virtual_functions[i]->bus, > + data->pci_dev.virtual_functions[i]->slot, > + data->pci_dev.virtual_functions[i]->function); > + } > + virBufferAddLit(&buf, " </capability>\n"); > + } > break; > case VIR_NODE_DEV_CAP_USB_DEV: > virBufferVSprintf(&buf, " <bus>%d</bus>\n", data->usb_dev.bus); > @@ -1387,6 +1412,7 @@ out: > > void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) > { > + int i = 0; > union _virNodeDevCapData *data = &caps->data; > > switch (caps->type) { > @@ -1402,6 +1428,10 @@ void virNodeDevCapsDefFree(virNodeDevCapsDefPtr caps) > case VIR_NODE_DEV_CAP_PCI_DEV: > VIR_FREE(data->pci_dev.product_name); > VIR_FREE(data->pci_dev.vendor_name); > + VIR_FREE(data->pci_dev.physical_function); > + for (i = 0 ; i < data->pci_dev.num_virtual_functions ; i++) { > + VIR_FREE(data->pci_dev.virtual_functions[i]); > + } > break; > case VIR_NODE_DEV_CAP_USB_DEV: > VIR_FREE(data->usb_dev.product_name); > diff --git a/src/conf/node_device_conf.h b/src/conf/node_device_conf.h > index 7a20bd6..4bfac90 100644 > --- a/src/conf/node_device_conf.h > +++ b/src/conf/node_device_conf.h > @@ -76,6 +76,18 @@ enum virNodeDevScsiHostCapFlags { > VIR_NODE_DEV_CAP_FLAG_HBA_VPORT_OPS = (1 << 1), > }; > > +enum virNodeDevPCICapFlags { > + VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION = (1 << 0), > + VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION = (1 << 1), > +}; > + > +struct pci_config_address { > + unsigned domain; > + unsigned bus; > + unsigned slot; > + unsigned function; > +}; > + > typedef struct _virNodeDevCapsDef virNodeDevCapsDef; > typedef virNodeDevCapsDef *virNodeDevCapsDefPtr; > struct _virNodeDevCapsDef { > @@ -105,6 +117,10 @@ struct _virNodeDevCapsDef { > unsigned class; > char *product_name; > char *vendor_name; > + struct pci_config_address *physical_function; > + struct pci_config_address **virtual_functions; > + unsigned num_virtual_functions; > + unsigned flags; > } pci_dev; > struct { > unsigned bus; > diff --git a/src/node_device/node_device_driver.h b/src/node_device/node_device_driver.h > index 4f0822c..7b68f41 100644 > --- a/src/node_device/node_device_driver.h > +++ b/src/node_device/node_device_driver.h > @@ -37,6 +37,10 @@ > #define LINUX_SYSFS_VPORT_CREATE_POSTFIX "/vport_create" > #define LINUX_SYSFS_VPORT_DELETE_POSTFIX "/vport_delete" > > +#define SRIOV_FOUND 0 > +#define SRIOV_NOT_FOUND 1 > +#define SRIOV_ERROR -1 > + > #define LINUX_NEW_DEVICE_WAIT_TIME 60 > > #ifdef HAVE_HAL > @@ -61,6 +65,14 @@ int check_fc_host_linux(union _virNodeDevCapData *d); > #define check_vport_capable(d) check_vport_capable_linux(d) > int check_vport_capable_linux(union _virNodeDevCapData *d); > > +#define get_physical_function(s,d) get_physical_function_linux(s,d) > +int get_physical_function_linux(const char *sysfs_path, > + union _virNodeDevCapData *d); > + > +#define get_virtual_functions(s,d) get_virtual_functions_linux(s,d) > +int get_virtual_functions_linux(const char *sysfs_path, > + union _virNodeDevCapData *d); > + > #define read_wwn(host, file, wwn) read_wwn_linux(host, file, wwn) > int read_wwn_linux(int host, const char *file, char **wwn); > > @@ -68,6 +80,8 @@ int read_wwn_linux(int host, const char *file, char **wwn); > > #define check_fc_host(d) > #define check_vport_capable(d) > +#define get_physical_function(sysfs_path, d) > +#define get_virtual_functions(sysfs_path, d) > #define read_wwn(host, file, wwn) > > #endif /* __linux__ */ > diff --git a/src/node_device/node_device_hal.c b/src/node_device/node_device_hal.c > index 6de7e3d..4496301 100644 > --- a/src/node_device/node_device_hal.c > +++ b/src/node_device/node_device_hal.c > @@ -145,14 +145,20 @@ static int gather_pci_cap(LibHalContext *ctx, const char *udi, > (void)virStrToLong_ui(p+1, &p, 16, &d->pci_dev.slot); > (void)virStrToLong_ui(p+1, &p, 16, &d->pci_dev.function); > } > + > + get_physical_function(sysfs_path, d); > + get_virtual_functions(sysfs_path, d); > + > VIR_FREE(sysfs_path); > } > + > (void)get_int_prop(ctx, udi, "pci.vendor_id", (int *)&d->pci_dev.vendor); > if (get_str_prop(ctx, udi, "pci.vendor", &d->pci_dev.vendor_name) != 0) > (void)get_str_prop(ctx, udi, "info.vendor", &d->pci_dev.vendor_name); > (void)get_int_prop(ctx, udi, "pci.product_id", (int *)&d->pci_dev.product); > if (get_str_prop(ctx, udi, "pci.product", &d->pci_dev.product_name) != 0) > (void)get_str_prop(ctx, udi, "info.product", &d->pci_dev.product_name); > + > return 0; > } > > diff --git a/src/node_device/node_device_linux_sysfs.c b/src/node_device/node_device_linux_sysfs.c > index b7cf782..d2b10dd 100644 > --- a/src/node_device/node_device_linux_sysfs.c > +++ b/src/node_device/node_device_linux_sysfs.c > @@ -29,6 +29,7 @@ > #include "virterror_internal.h" > #include "memory.h" > #include "logging.h" > +#include <dirent.h> > > #define VIR_FROM_THIS VIR_FROM_NODEDEV > > @@ -70,7 +71,7 @@ int read_wwn_linux(int host, const char *file, char **wwn) > char buf[64]; > > if (open_wwn_file(LINUX_SYSFS_FC_HOST_PREFIX, host, file, &fd) < 0) { > - goto out; > + goto out; > } > > memset(buf, 0, sizeof(buf)); > @@ -184,4 +185,201 @@ out: > return retval; > } > > + > +static int logStrToLong_ui(char const *s, > + char **end_ptr, > + int base, > + unsigned int *result) > +{ > + int ret = 0; > + > + ret = virStrToLong_ui(s, end_ptr, base, result); > + if (ret != 0) { > + VIR_ERROR("Failed to convert '%s' to unsigned int\n", s); > + } else { > + VIR_DEBUG("Converted '%s' to unsigned int %u\n", s, *result); > + } There's no need for '\n' in log messages. > + > + return ret; > +} > + > + > +static int parse_pci_config_address(char *address, struct pci_config_address *bdf) > +{ > + char *p = NULL; > + int ret = -1; > + > + if ((address == NULL) || (logStrToLong_ui(address, > + &p, > + 16, > + &bdf->domain) == -1)) { > + goto out; > + } > + > + if ((p == NULL) || (logStrToLong_ui(p+1, > + &p, > + 16, > + &bdf->bus) == -1)) { > + goto out; > + } > + > + if ((p == NULL) || (logStrToLong_ui(p+1, > + &p, > + 16, > + &bdf->slot) == -1)) { > + goto out; > + } > + > + if ((p == NULL) || (logStrToLong_ui(p+1, > + &p, > + 16, > + &bdf->function) == -1)) { > + goto out; > + } > + > + ret = 0; > + > +out: > + return ret; > +} > + > + > + > + > +static int get_sriov_function(const char *device_link, > + struct pci_config_address **bdf) > +{ > + char *device_path = NULL, *config_address = NULL; > + char errbuf[64]; > + int ret = SRIOV_ERROR; > + > + VIR_DEBUG("Attempting to resolve device path from device link '%s'\n", > + device_link); > + > + if (!virFileExists(device_link)) { > + > + VIR_DEBUG("SR IOV function link '%s' does not exist\n", device_link); > + /* Not an SR IOV device, not an error, either. */ > + ret = SRIOV_NOT_FOUND; > + > + goto out; > + > + } > + > + device_path = realpath(device_link, device_path); > + if (device_path == NULL) { > + memset(errbuf, '\0', sizeof(errbuf)); > + VIR_ERROR("Failed to resolve device link '%s': '%s'\n", device_link, > + strerror_r(errno, errbuf, sizeof(errbuf))); Can you replace that with virStrerror() because strerror_r() has an annoying API difference on Linux compared to POSIX (returns char * instead of int, or vica-verca). > + goto out; > + } > + > + VIR_DEBUG("SR IOV device path is '%s'\n", device_path); > + config_address = basename(device_path); > + if (VIR_ALLOC(*bdf) != 0) { > + VIR_ERROR0("Failed to allocate memory for PCI device name\n"); > + goto out; > + } > + > + if (parse_pci_config_address(config_address, *bdf) != 0) { > + VIR_ERROR("Failed to parse PCI config address '%s'\n", config_address); > + goto out; > + } > + > + VIR_DEBUG("SR IOV function %.4x:%.2x:%.2x.%.1x/>\n", > + (*bdf)->domain, > + (*bdf)->bus, > + (*bdf)->slot, > + (*bdf)->function); > + > + ret = SRIOV_FOUND; > + > +out: > + VIR_FREE(device_path); > + return ret; > +} > + > + > +int get_physical_function_linux(const char *sysfs_path, > + union _virNodeDevCapData *d ATTRIBUTE_UNUSED) > +{ > + int ret = -1; > + char *device_link = NULL; > + > + VIR_DEBUG("Attempting to get SR IOV physical function for device " > + "with sysfs path '%s'\n", sysfs_path); > + > + if (virBuildPath(&device_link, sysfs_path, "physfn") == -1) { > + virReportOOMError(NULL); > + } else { > + ret = get_sriov_function(device_link, &d->pci_dev.physical_function); > + if (ret == SRIOV_FOUND) { > + d->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCI_PHYSICAL_FUNCTION; > + } > + } > + > + VIR_FREE(device_link); > + return ret; > +} > + > + > +int get_virtual_functions_linux(const char *sysfs_path, > + union _virNodeDevCapData *d) > +{ > + int ret = -1; > + DIR *dir = NULL; > + struct dirent *entry = NULL; > + char *device_link = NULL; > + > + VIR_DEBUG("Attempting to get SR IOV virtual functions for device" > + "with sysfs path '%s'\n", sysfs_path); > + > + dir = opendir(sysfs_path); > + if (dir == NULL) { > + goto out; > + } > + > + while ((entry = readdir(dir))) { > + if (STRPREFIX(entry->d_name, "virtfn")) { > + /* This local is just to avoid lines of code much > 80 col. */ > + unsigned int *num_funcs = &d->pci_dev.num_virtual_functions; > + > + if (virBuildPath(&device_link, sysfs_path, entry->d_name) == -1) { > + virReportOOMError(NULL); > + goto out; > + } > + > + VIR_DEBUG("Number of virtual functions: %d\n", *num_funcs); > + if (VIR_REALLOC_N(d->pci_dev.virtual_functions, (*num_funcs) + 1) != 0) { > + virReportOOMError(NULL); > + goto out; > + } > + > + if (get_sriov_function(device_link, > + &d->pci_dev.virtual_functions[*num_funcs]) != > + SRIOV_FOUND) { > + > + /* We should not get back SRIOV_NOT_FOUND in this > + * case, so if we do, it's an error. */ > + VIR_ERROR("Failed to get SR IOV function from device link '%s'\n", > + device_link); > + goto out; > + } else { > + (*num_funcs)++; > + d->pci_dev.flags |= VIR_NODE_DEV_CAP_FLAG_PCI_VIRTUAL_FUNCTION; > + } > + > + VIR_FREE(device_link); > + } > + } > + > + closedir(dir); > + > + ret = 0; > + > +out: > + VIR_FREE(device_link); > + return 0; > +} > + > #endif /* __linux__ */ > -- ACK aside from the minor logging fixes & attribute whitespace Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list