On 10/15/20 7:21 AM, zhenwei pi wrote:
Collect PCI passthrough net device stats from kernel by netlink
API.
Currently, libvirt can not get PCI passthrough net device stats,
run command:
#virsh domifstat instance --interface=52:54:00:2d:b2:35
error: Failed to get interface stats instance 52:54:00:2d:b2:35
error: internal error: Interface name not provided
The PCI device(usually SR-IOV virtual function device) is detached
(Actually it is *always* an SR-IOV VF - SR-IOV functionality is required
to set the MAC address and clan tag of the interface; without that
capability, it can only be assigned to the guest as a plain <hostdev>,
and in that case libvirt wouldn't even know that it was a network device.)
while it's used in PCI passthrough mode. And we can not parse this
device from /proc/net/dev any more.
In this patch, libvirt checks net device is VF of not firstly, then
query virNetDevVFInterfaceStats(new API).
virNetDevVFInterfaceStats gets PF name, then dumps VF stats by
netlink, compares with VF index and get stats(suggest by Laine,
original version is implemented by zhenwei which parses VFs info
of all PFs, compares MAC address until the two MAC addresses match).
address until the two MAC addresses match.
(you forgot to remove this bit - I'll clean it up before pushing)
'#ip -s link show' can get the same result. Instead of parsing the
output result, implement this feature by libnl API.
Notice that this feature depends on driver of PF.
Test on Mellanox ConnectX-4 Lx, it works well.
Also test on Intel Corporation 82599ES, it works, but only get 0.
(ip-link command get the same result).
IFLA_VF_STATS is supported since Linux-4.2, suggested by Laine,
just using defined(__linux__) && WITH_LIBNL is enough.
(I'll also remove these historical artifacts of the review/revision process)
Thanks to Laine, Daniel<berrange@xxxxxxxxxx> & DHB for suggestions.
Signed-off-by: zhenwei pi <pizhenwei@xxxxxxxxxxxxx>
---
src/libvirt_private.syms | 1 +
src/qemu/qemu_driver.c | 13 ++++++++
src/util/virnetdev.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++--
src/util/virnetdev.h | 4 +++
4 files changed, 98 insertions(+), 2 deletions(-)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 52e9c6313f..ae543589f1 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2584,6 +2584,7 @@ virNetDevSetRootQDisc;
virNetDevSetupControl;
virNetDevSysfsFile;
virNetDevValidateConfig;
+virNetDevVFInterfaceStats;
# util/virnetdevbandwidth.h
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 825bdd9119..519af71fc1 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -10132,6 +10132,19 @@ qemuDomainInterfaceStats(virDomainPtr dom,
if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_VHOSTUSER) {
if (virNetDevOpenvswitchInterfaceStats(net->ifname, stats) < 0)
goto cleanup;
+ } else if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_HOSTDEV) {
+ virDomainHostdevDefPtr hostdev = virDomainNetGetActualHostdev(net);
+ virPCIDeviceAddressPtr vfAddr;
+
+ if (!hostdev) {
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ "%s", _("hostdev interface missing hostdev data"));
+ goto cleanup;
+ }
+
+ vfAddr = &hostdev->source.subsys.u.pci.addr;
+ if (virNetDevVFInterfaceStats(vfAddr, stats) < 0)
+ goto cleanup;
} else {
And extra line below goto cleanup would improve readability
if (virNetDevTapInterfaceStats(net->ifname, stats,
!virDomainNetTypeSharesHostView(net)) < 0)
diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c
index 5c7660dab4..f53e1751b3 100644
--- a/src/util/virnetdev.c
+++ b/src/util/virnetdev.c
@@ -1514,6 +1514,17 @@ static struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
.maxlen = sizeof(struct ifla_vf_mac) },
[IFLA_VF_VLAN] = { .type = NLA_UNSPEC,
.maxlen = sizeof(struct ifla_vf_vlan) },
+ [IFLA_VF_STATS] = { .type = NLA_NESTED },
+};
+
+
+static struct nla_policy ifla_vfstats_policy[IFLA_VF_STATS_MAX+1] = {
+ [IFLA_VF_STATS_RX_PACKETS] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_TX_PACKETS] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_RX_BYTES] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_TX_BYTES] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_BROADCAST] = { .type = NLA_U64 },
+ [IFLA_VF_STATS_MULTICAST] = { .type = NLA_U64 },
};
@@ -1651,13 +1662,14 @@ virNetDevSetVfConfig(const char *ifname, int vf,
static int
virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac,
- int *vlanid)
+ int *vlanid, virDomainInterfaceStatsPtr stats)
{
int rc = -1;
struct ifla_vf_mac *vf_mac;
struct ifla_vf_vlan *vf_vlan;
struct nlattr *tb_vf_info = {NULL, };
struct nlattr *tb_vf[IFLA_VF_MAX+1];
+ struct nlattr *tb_vf_stats[IFLA_VF_STATS_MAX+1];
int rem;
if (!tb[IFLA_VFINFO_LIST]) {
@@ -1693,6 +1705,26 @@ virNetDevParseVfConfig(struct nlattr **tb, int32_t vf, virMacAddrPtr mac,
}
}
+ if (stats && tb_vf[IFLA_VF_STATS] && tb_vf[IFLA_VF_MAC]) {
+ vf_mac = RTA_DATA(tb_vf[IFLA_VF_MAC]);
Hmm. Looking at this (correct) code made me realize that there has been
a bug higher up in this function ever since it was first implemented in
2012 (commit 5095bf06f)! - it has been checking
if (mac && tb[IFLA_VF_MAC])
instead of
if (mac && tb_vf[IFLA_VF_MAC])
I'll send a patch for that as soon as I'm finished reviewing and pushing
your patches.
+ if (vf_mac && vf_mac->vf == vf) {
+ rc = nla_parse_nested(tb_vf_stats, IFLA_VF_STATS_MAX,
+ tb_vf[IFLA_VF_STATS],
+ ifla_vfstats_policy);
+ if (rc < 0) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("error parsing IFLA_VF_STATS"));
+ return rc;
+ }
+
+ stats->rx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_BYTES]);
+ stats->tx_bytes = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_BYTES]);
+ stats->rx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_RX_PACKETS]);
+ stats->tx_packets = nla_get_u64(tb_vf_stats[IFLA_VF_STATS_TX_PACKETS]);
+ rc = 0;
+ }
+ }
+
if (rc == 0)
break;
}
@@ -1714,7 +1746,43 @@ virNetDevGetVfConfig(const char *ifname, int vf, virMacAddrPtr mac,
if (virNetlinkDumpLink(ifname, ifindex, &nlData, tb, 0, 0) < 0)
return -1;
- return virNetDevParseVfConfig(tb, vf, mac, vlanid);
+ return virNetDevParseVfConfig(tb, vf, mac, vlanid, NULL);
+}
+
+
+/**
+ * virNetDevVFInterfaceStats:
+ * @vfAddr: PCI address of a VF
+ * @stats: returns stats of the VF interface
+ *
+ * Get the VF interface from kernel by netlink.
+ * Returns 0 on success, -1 on failure.
+ */
+int
+virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr,
+ virDomainInterfaceStatsPtr stats)
+{
+ g_autofree void *nlData = NULL;
+ struct nlattr *tb[IFLA_MAX + 1] = {NULL, };
+ g_autofree char *vfSysfsPath = NULL;
+ g_autofree char *pfname = NULL;
+ int vf = -1;
+
+ if (virPCIDeviceAddressGetSysfsFile(vfAddr, &vfSysfsPath) < 0)
+ return -1;
+
+ if (!virPCIIsVirtualFunction(vfSysfsPath)) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, _("'%s' is not a VF device"), vfSysfsPath);
+ return -1;
+ }
+
+ if (virPCIGetVirtualFunctionInfo(vfSysfsPath, -1, &pfname, &vf) < 0)
+ return -1;
+
+ if (virNetlinkDumpLink(pfname, -1, &nlData, tb, 0, 0) < 0)
+ return -1;
+
+ return virNetDevParseVfConfig(tb, vf, NULL, NULL, stats);
}
@@ -2330,6 +2398,16 @@ virNetDevSetNetConfig(const char *linkdev G_GNUC_UNUSED,
}
+int
+virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr G_GNUC_UNUSED,
+ virDomainInterfaceStatsPtr stats G_GNUC_UNUSED)
+{
+ virReportSystemError(ENOSYS, "%s",
+ _("Unable to get VF net device stats on this platform"));
+ return -1;
+}
+
+
#endif /* defined(WITH_LIBNL) */
VIR_ENUM_IMPL(virNetDevIfState,
diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h
index dfef49938f..53e606c61c 100644
--- a/src/util/virnetdev.h
+++ b/src/util/virnetdev.h
@@ -316,4 +316,8 @@ int virNetDevSetRootQDisc(const char *ifname,
const char *qdisc)
G_GNUC_NO_INLINE;
+int virNetDevVFInterfaceStats(virPCIDeviceAddressPtr vfAddr,
+ virDomainInterfaceStatsPtr stats)
+ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2);
+
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virNetDevRxFilter, virNetDevRxFilterFree);
I'm unable to test on a system that has interface stats, but am assuming
that you have :-).
This looks fine to me, so:
Reviewed-by: Laine Stump <laine@xxxxxxxxxx>
and pushed. Thanks for your contribution!