Mellanox ConnectX-3 dual port SRIOV NICs present a bit of a challenge when assigning one of their VFs to a guest using VFIO device assignment. These NICs have only a single PCI PF device, and that single PF has two netdevs sharing the single PCI address - one for port 1 and one for port 2. When a VF is created it can also have 2 netdevs, or it can be setup in "single port" mode, where the VF has only a single netdev, and that netdev is connected either to port 1 or to port 2. When the VF is created in dual port mode, you get/set the MAC address/vlan tag for the port 1 VF by sending a netlink message to the PF's port1 netdev, and you get/set the MAC address/vlan tag for the port 2 VF by sending a netlink message to the PF's port 2 netdev. (Of course libvirt doesn't have any way to describe MAC/vlan info for 2 ports in a single hostdev interface, so that's a bit of a moot point) When the VF is created in single port mode, you can *set* the MAC/vlan info by sending a netlink message to *either* PF netdev - the driver is smart enough to understand that there's only a single netdev, and set the MAC/vlan for that netdev. When you want to *get* it, however, the driver is more accurate - it will return 00:00:00:00:00:00 for the MAC if you request it from the port 1 PF netdev when the VF was configured to be single port on port 2, or if you request if from the port 2 PF netdev when the VF was configured to be single port on port 1. Based on this information, when *getting* the MAC/vlan info (to save the original setting prior to assignment), we determine the correct PF netdev by matching phys_port_id between VF and PF. (IMPORTANT NOTE: this implies that to do PCI device assignment of the VFs on dual port Mellanox cards using <interface type='hostdev'> (i.e. if you want the MAC address/vlan tag to be set), not only must the VFs be configured in single port mode, but also the VFs *must* be bound to the host VF net driver, and libvirt must use managed='yes') By the time libvirt is ready to set the new MAC/vlan tag, the VF has already been unbound from the host net driver and bound to vfio-pci. This isn't problematic though because, as stated earlier, when a VF is created in single port mode, commands to configure it can be sent to either the port 1 PF netdev or the port 2 PF netdev. When it is time to restore the original MAC/vlan tag, again the VF will *not* be bound to a host net driver, so it won't be possible to learn from sysfs whether to use the port 1 or port 2 PF netdev for the netlink commands. And again, it doesn't matter which netdev you use. However, we must keep in mind that we saved the original settings to a file called "${PF}_${VFNUM}". To solve this problem, we just check for the existence of ${PF1}_${VFNUM} and ${PF2}_${VFNUM}, and use whichever one we find (since we know that only one can be there) --- New in V2 src/util/virhostdev.c | 27 +++++++++++++++++++++------ src/util/virpci.c | 31 +++++++++++++++++++++++++++++-- src/util/virpci.h | 4 +++- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/util/virhostdev.c b/src/util/virhostdev.c index 580f0fac0..102fd85c1 100644 --- a/src/util/virhostdev.c +++ b/src/util/virhostdev.c @@ -307,7 +307,9 @@ virHostdevIsVirtualFunction(virDomainHostdevDefPtr hostdev) static int -virHostdevNetDevice(virDomainHostdevDefPtr hostdev, char **linkdev, +virHostdevNetDevice(virDomainHostdevDefPtr hostdev, + int pfNetDevIdx, + char **linkdev, int *vf) { int ret = -1; @@ -317,9 +319,10 @@ virHostdevNetDevice(virDomainHostdevDefPtr hostdev, char **linkdev, return ret; if (virPCIIsVirtualFunction(sysfs_path) == 1) { - if (virPCIGetVirtualFunctionInfo(sysfs_path, linkdev, - vf) < 0) + if (virPCIGetVirtualFunctionInfo(sysfs_path, pfNetDevIdx, + linkdev, vf) < 0) { goto cleanup; + } } else { /* In practice this should never happen, since we currently * only support assigning SRIOV VFs via <interface @@ -444,7 +447,7 @@ virHostdevSaveNetConfig(virDomainHostdevDefPtr hostdev, goto cleanup; } - if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0) + if (virHostdevNetDevice(hostdev, -1, &linkdev, &vf) < 0) goto cleanup; if (virNetDevSaveNetConfig(linkdev, vf, stateDir, true) < 0) @@ -482,7 +485,7 @@ virHostdevSetNetConfig(virDomainHostdevDefPtr hostdev, if (!virHostdevIsPCINetDevice(hostdev)) return 0; - if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0) + if (virHostdevNetDevice(hostdev, -1, &linkdev, &vf) < 0) goto cleanup; vlan = virDomainNetGetActualVlan(hostdev->parent.data.net); @@ -545,7 +548,7 @@ virHostdevRestoreNetConfig(virDomainHostdevDefPtr hostdev, return ret; } - if (virHostdevNetDevice(hostdev, &linkdev, &vf) < 0) + if (virHostdevNetDevice(hostdev, 0, &linkdev, &vf) < 0) return ret; virtPort = virDomainNetGetActualVirtPortProfile( @@ -565,6 +568,18 @@ virHostdevRestoreNetConfig(virDomainHostdevDefPtr hostdev, ret = virNetDevReadNetConfig(linkdev, vf, oldStateDir, &adminMAC, &vlan, &MAC); + if (ret < 0) { + /* see if the config was saved using the PF's "port 2" + * netdev for the file name. + */ + VIR_FREE(linkdev); + + if (virHostdevNetDevice(hostdev, 1, &linkdev, &vf) >= 0) { + ret = virNetDevReadNetConfig(linkdev, vf, stateDir, + &adminMAC, &vlan, &MAC); + } + } + if (ret == 0) { /* if a MAC was stored for the VF, we should now restore * that as the adminMAC. We have to do it this way because diff --git a/src/util/virpci.c b/src/util/virpci.c index 62a36b380..5ded77087 100644 --- a/src/util/virpci.c +++ b/src/util/virpci.c @@ -2935,10 +2935,14 @@ virPCIGetNetName(const char *device_link_sysfs_path, int virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path, - char **pfname, int *vf_index) + int pfNetDevIdx, + char **pfname, + int *vf_index) { virPCIDeviceAddressPtr pf_config_address = NULL; char *pf_sysfs_device_path = NULL; + char *vfname = NULL; + char *vfPhysPortID = NULL; int ret = -1; if (virPCIGetPhysicalFunction(vf_sysfs_device_path, &pf_config_address) < 0) @@ -2957,8 +2961,28 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path, goto cleanup; } - if (virPCIGetNetName(pf_sysfs_device_path, 0, NULL, pfname) < 0) + /* If the caller hasn't asked for a specific pfNetDevIdx, and VF + * is bound to a netdev, learn that netdev's phys_port_id (if + * available). This can be used to disambiguate when the PF has + * multiple netdevs. If the VF isn't bound to a netdev, then we + * return netdev[pfNetDevIdx] on the PF, which may or may not be + * correct. + */ + if (pfNetDevIdx == -1) { + if (virPCIGetNetName(vf_sysfs_device_path, 0, NULL, &vfname) < 0) + goto cleanup; + + if (vfname) { + if (virNetDevGetPhysPortID(vfname, &vfPhysPortID) < 0) + goto cleanup; + } + pfNetDevIdx = 0; + } + + if (virPCIGetNetName(pf_sysfs_device_path, + pfNetDevIdx, vfPhysPortID, pfname) < 0) { goto cleanup; + } if (!*pfname) { /* this shouldn't be possible. A VF can't exist unless its @@ -2974,6 +2998,8 @@ virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path, cleanup: VIR_FREE(pf_config_address); VIR_FREE(pf_sysfs_device_path); + VIR_FREE(vfname); + VIR_FREE(vfPhysPortID); return ret; } @@ -3044,6 +3070,7 @@ virPCIGetNetName(const char *device_link_sysfs_path ATTRIBUTE_UNUSED, int virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path ATTRIBUTE_UNUSED, + int pfNetDevIdx ATTRIBUTE_UNUSED, char **pfname ATTRIBUTE_UNUSED, int *vf_index ATTRIBUTE_UNUSED) { diff --git a/src/util/virpci.h b/src/util/virpci.h index adf336706..f1fbe39e6 100644 --- a/src/util/virpci.h +++ b/src/util/virpci.h @@ -226,7 +226,9 @@ int virPCIGetAddrString(unsigned int domain, int virPCIDeviceAddressParse(char *address, virPCIDeviceAddressPtr bdf); int virPCIGetVirtualFunctionInfo(const char *vf_sysfs_device_path, - char **pfname, int *vf_index); + int pfNetDevIdx, + char **pfname, + int *vf_index); int virPCIDeviceUnbind(virPCIDevicePtr dev); int virPCIDeviceRebind(virPCIDevicePtr dev); -- 2.13.3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list