+Adrian,Moshe On Fri, Aug 28, 2020 at 01:53:21PM +0300, Dmytro Linkin wrote: > Current virPCIGetNetName() logic is to get net device name by checking > it's phys_port_id, if caller provide it, or by it's index (eg, by it's > position at sysfs net directory). This approach worked fine up until > linux kernel version 5.8, where NVIDIA Mellanox driver implemented > linking of VFs' representors to PCI device in switchdev mode. This mean > that device's sysfs net directory will hold multiple net devices. Ex.: > > $ ls '/sys/bus/pci/devices/0000:82:00.0/net' > ens1f0 eth0 eth1 > > Most switch devices support phys_port_name instead of phys_port_id, so > virPCIGetNetName() will try to get PF name by it's index - 0. The > problem here is that the PF nedev entry may not be the first. > > To fix that, for switch devices, we introduce a new logic to select the > PF uplink netdev according to the content of phys_port_name. Extend > virPCIGetNetName() with physPortNameRegex variable to get proper device > by it's phys_port_name scheme, for ex., "p[0-9]+$" to get PF, > "pf[0-9]+vf[0-9]+$" to get VF or "p1$" to get exact net device. So now > virPCIGetNetName() logic work in following sequence: > - filter by phys_port_id, if it's provided, > or > - filter by phys_port_name, if it's regex provided, > or > - get net device by it's index (position) in sysfs net directory. > Also, make getting content of iface sysfs files more generic. > > Signed-off-by: Dmytro Linkin <dlinkin@xxxxxxxxxx> > Reviewed-by: Adrian Chiris <adrianc@xxxxxxxxxx> > --- > src/hypervisor/virhostdev.c | 2 +- > src/util/virnetdev.c | 74 ++++++++++++++++++++++++++++++++++++--------- > src/util/virnetdev.h | 4 +++ > src/util/virpci.c | 63 ++++++++++++++++++++++++++++++++++++-- > src/util/virpci.h | 6 ++++ > 5 files changed, 130 insertions(+), 19 deletions(-) > > diff --git a/src/hypervisor/virhostdev.c b/src/hypervisor/virhostdev.c > index 69102b8..1f5c347 100644 > --- a/src/hypervisor/virhostdev.c > +++ b/src/hypervisor/virhostdev.c > @@ -333,7 +333,7 @@ virHostdevNetDevice(virDomainHostdevDefPtr hostdev, > * type='hostdev'>, and it is only those devices that should > * end up calling this function. > */ > - if (virPCIGetNetName(sysfs_path, 0, NULL, linkdev) < 0) > + if (virPCIGetNetName(sysfs_path, 0, NULL, NULL, linkdev) < 0) > return -1; > > if (!(*linkdev)) { > diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c > index b42fa86..99e3b35 100644 > --- a/src/util/virnetdev.c > +++ b/src/util/virnetdev.c > @@ -1112,6 +1112,29 @@ virNetDevGetPCIDevice(const char *devName) > } > > > +/* A wrapper to get content of file from ifname SYSFS_NET_DIR > + */ > +static int > +virNetDevGetSysfsFileValue(const char *ifname, > + const char *fileName, > + char **sysfsFileData) > +{ > + g_autofree char *sysfsFile = NULL; > + > + *sysfsFileData = NULL; > + > + if (virNetDevSysfsFile(&sysfsFile, ifname, fileName) < 0) > + return -1; > + > + /* a failure to read just means the driver doesn't support > + * <fileName>, so set success now and ignore the return from > + * virFileReadAllQuiet(). > + */ > + > + ignore_value(virFileReadAllQuiet(sysfsFile, 1024, sysfsFileData)); > + return 0; > +} > + > /** > * virNetDevGetPhysPortID: > * > @@ -1130,20 +1153,29 @@ int > virNetDevGetPhysPortID(const char *ifname, > char **physPortID) > { > - g_autofree char *physPortIDFile = NULL; > - > - *physPortID = NULL; > - > - if (virNetDevSysfsFile(&physPortIDFile, ifname, "phys_port_id") < 0) > - return -1; > + return virNetDevGetSysfsFileValue(ifname, "phys_port_id", physPortID); > +} > > - /* a failure to read just means the driver doesn't support > - * phys_port_id, so set success now and ignore the return from > - * virFileReadAllQuiet(). > - */ > > - ignore_value(virFileReadAllQuiet(physPortIDFile, 1024, physPortID)); > - return 0; > +/** > + * virNetDevGetPhysPortName: > + * > + * @ifname: name of a netdev > + * > + * @physPortName: pointer to char* that will receive @ifname's > + * phys_port_name from sysfs (null terminated > + * string). Could be NULL if @ifname's net driver doesn't > + * support phys_port_name (most netdev drivers > + * don't). Caller is responsible for freeing the string > + * when finished. > + * > + * Returns 0 on success or -1 on failure. > + */ > +int > +virNetDevGetPhysPortName(const char *ifname, > + char **physPortName) > +{ > + return virNetDevGetSysfsFileValue(ifname, "phys_port_name", physPortName); > } > > > @@ -1200,7 +1232,7 @@ virNetDevGetVirtualFunctions(const char *pfname, > } > > if (virPCIGetNetName(pci_sysfs_device_link, 0, > - pfPhysPortID, &((*vfname)[i])) < 0) { > + pfPhysPortID, NULL, &((*vfname)[i])) < 0) { > goto cleanup; > } > > @@ -1295,7 +1327,8 @@ virNetDevGetPhysicalFunction(const char *ifname, char **pfname) > return -1; > > if (virPCIGetNetName(physfn_sysfs_path, 0, > - vfPhysPortID, pfname) < 0) { > + vfPhysPortID, > + VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) { > return -1; > } > > @@ -1358,7 +1391,7 @@ virNetDevPFGetVF(const char *pfname, int vf, char **vfname) > * isn't bound to a netdev driver, it won't have a netdev name, > * and vfname will be NULL). > */ > - return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, vfname); > + return virPCIGetNetName(virtfnSysfsPath, 0, pfPhysPortID, NULL, vfname); > } > > > @@ -1403,6 +1436,17 @@ virNetDevGetPhysPortID(const char *ifname G_GNUC_UNUSED, > } > > int > +virNetDevGetPhysPortName(const char *ifname G_GNUC_UNUSED, > + char **physPortName) > +{ > + /* this actually should never be called, and is just here to > + * satisfy the linker. > + */ > + *physPortName = NULL; > + return 0; > +} > + > +int > virNetDevGetVirtualFunctions(const char *pfname G_GNUC_UNUSED, > char ***vfname G_GNUC_UNUSED, > virPCIDeviceAddressPtr **virt_fns G_GNUC_UNUSED, > diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h > index 55e3948..712421d 100644 > --- a/src/util/virnetdev.h > +++ b/src/util/virnetdev.h > @@ -229,6 +229,10 @@ int virNetDevGetPhysPortID(const char *ifname, > char **physPortID) > ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) > G_GNUC_WARN_UNUSED_RESULT; > +int virNetDevGetPhysPortName(const char *ifname, > + char **physPortName) > + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) > + G_GNUC_WARN_UNUSED_RESULT; > > int virNetDevGetVirtualFunctions(const char *pfname, > char ***vfname, > diff --git a/src/util/virpci.c b/src/util/virpci.c > index 47c671d..18b3f66 100644 > --- a/src/util/virpci.c > +++ b/src/util/virpci.c > @@ -2409,8 +2409,10 @@ virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr, > * virPCIGetNetName: > * @device_link_sysfs_path: sysfs path to the PCI device > * @idx: used to choose which netdev when there are several > - * (ignored if physPortID is set) > + * (ignored if physPortID or physPortNameRegex is set) > * @physPortID: match this string in the netdev's phys_port_id > + * (or NULL to ignore and use phys_port_name or idx instead) > + * @physPortNameRegex: match this regex with netdev's phys_port_name > * (or NULL to ignore and use idx instead) > * @netname: used to return the name of the netdev > * (set to NULL (but returns success) if there is no netdev) > @@ -2421,11 +2423,13 @@ int > virPCIGetNetName(const char *device_link_sysfs_path, > size_t idx, > char *physPortID, > + char *physPortNameRegex, > char **netname) > { > g_autofree char *pcidev_sysfs_net_path = NULL; > g_autofree char *firstEntryName = NULL; > g_autofree char *thisPhysPortID = NULL; > + g_autofree char *thisPhysPortName = NULL; > int ret = -1; > DIR *dir = NULL; > struct dirent *entry = NULL; > @@ -2466,6 +2470,41 @@ virPCIGetNetName(const char *device_link_sysfs_path, > > continue; > } > + } else if (physPortNameRegex) { > + /* Most switch devices use phys_port_name instead of > + * phys_port_id. > + * NOTE: VFs' representors net devices can be linked to PF's PCI > + * device, which mean that there'll be multiple net devices > + * instances and to get a proper net device need to match on > + * specific regex. > + * To get PF netdev, for ex., used following regex: > + * "(p[0-9]+$)|(p[0-9]+s[0-9]+$)" > + * or to get exact VF's netdev next regex is used: > + * "pf0vf1$" > + */ > + if (virNetDevGetPhysPortName(entry->d_name, &thisPhysPortName) < 0) > + goto cleanup; > + > + if (thisPhysPortName) { > + /* if this one doesn't match, keep looking */ > + if (!virStringMatch(thisPhysPortName, physPortNameRegex)) { > + VIR_FREE(thisPhysPortName); > + /* Save the first entry we find to use as a failsafe > + * in case we fail to match on regex. > + */ > + if (!firstEntryName) > + firstEntryName = g_strdup(entry->d_name); > + > + continue; > + } > + } else { > + /* Save the first entry we find to use as a failsafe in case > + * phys_port_name is not supported. > + */ > + if (!firstEntryName) > + firstEntryName = g_strdup(entry->d_name); > + continue; > + } > } else { > if (i++ < idx) > continue; > @@ -2494,6 +2533,22 @@ virPCIGetNetName(const char *device_link_sysfs_path, > "phys_port_id '%s' under PCI device at %s"), > physPortID, device_link_sysfs_path); > } > + } else if (physPortNameRegex) { > + if (firstEntryName) { > + /* We didn't match the provided phys_port_name regex, probably > + * because kernel or NIC driver doesn't support it, so just > + * return first netname we found. > + */ > + *netname = firstEntryName; > + firstEntryName = NULL; > + ret = 0; > + } else { > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Could not find network device with " > + "phys_port_name matching regex '%s' " > + "under PCI device at %s"), > + physPortNameRegex, device_link_sysfs_path); > + } > } else { > ret = 0; /* no netdev at the given index is *not* an error */ > } > @@ -2539,7 +2594,7 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path, > * correct. > */ > if (pfNetDevIdx == -1) { > - if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0) > + if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, NULL, &vfname) < 0) > goto cleanup; > > if (vfname) { > @@ -2550,7 +2605,8 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path, > } > > if (virPCIGetNetName(pf_sysfs_device_path, > - pfNetDevIdx, vfPhysPortID, pfname) < 0) { > + pfNetDevIdx, vfPhysPortID, > + VIR_PF_PHYS_PORT_NAME_REGEX, pfname) < 0) { > goto cleanup; > } > > @@ -2688,6 +2744,7 @@ int > virPCIGetNetName(const char *device_link_sysfs_path G_GNUC_UNUSED, > size_t idx G_GNUC_UNUSED, > char *physPortID G_GNUC_UNUSED, > + char *physPortNameScheme G_GNUC_UNUSED, > char **netname G_GNUC_UNUSED) > { > virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _(unsupported)); > diff --git a/src/util/virpci.h b/src/util/virpci.h > index b3322ba..6ea0873 100644 > --- a/src/util/virpci.h > +++ b/src/util/virpci.h > @@ -55,6 +55,11 @@ struct _virZPCIDeviceAddress { > > #define VIR_PCI_DEVICE_ADDRESS_FMT "%04x:%02x:%02x.%d" > > +/* Represents format of PF's phys_port_name in switchdev mode: > + * 'p%u' or 'p%us%u'. New line checked since value is readed from sysfs file. > + */ > +# define VIR_PF_PHYS_PORT_NAME_REGEX ((char *)"(p[0-9]+$)|(p[0-9]+s[0-9]+$)") > + > struct _virPCIDeviceAddress { > unsigned int domain; > unsigned int bus; > @@ -232,6 +237,7 @@ int virPCIDeviceAddressGetSysfsFile(virPCIDeviceAddressPtr addr, > int virPCIGetNetName(const char *device_link_sysfs_path, > size_t idx, > char *physPortID, > + char *physPortNameRegex, > char **netname); > > int virPCIGetSysfsFile(char *virPCIDeviceName, > -- > 1.8.3.1 >