Adding functionality to libvirt that will allow it query the interface for the availability of switchdev Offloading NIC capabilities --- configure.ac | 13 ++ docs/formatnode.html.in | 1 + src/util/virnetdev.c | 204 +++++++++++++++++++++- src/util/virnetdev.h | 1 + tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml | 1 + tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml | 1 + 6 files changed, 220 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index afacf40..a050b99 100644 --- a/configure.ac +++ b/configure.ac @@ -627,6 +627,19 @@ if test "$with_linux" = "yes"; then AC_CHECK_HEADERS([linux/btrfs.h]) fi +dnl +dnl check for kernel headers required by devlink +dnl +if test "$with_linux" = "yes"; then + AC_CHECK_HEADERS([linux/devlink.h]) + AC_CHECK_DECLS([DEVLINK_GENL_VERSION, DEVLINK_GENL_NAME, DEVLINK_ATTR_MAX, DEVLINK_CMD_ESWITCH_GET, DEVLINK_ATTR_BUS_NAME, DEVLINK_ATTR_DEV_NAME, DEVLINK_ATTR_ESWITCH_MODE, DEVLINK_ESWITCH_MODE_SWITCHDEV], + [AC_DEFINE([HAVE_DECL_DEVLINK], + [1], + [whether devlink declarations is available])], + [], + [[#include <linux/devlink.h>]]) +fi + dnl Allow perl/python overrides AC_PATH_PROGS([PYTHON], [python2 python]) if test -z "$PYTHON"; then diff --git a/docs/formatnode.html.in b/docs/formatnode.html.in index 32451d5..e7b30ea 100644 --- a/docs/formatnode.html.in +++ b/docs/formatnode.html.in @@ -227,6 +227,7 @@ <dt><code>rxhash</code></dt><dd>receive-hashing</dd> <dt><code>rdma</code></dt><dd>remote-direct-memory-access</dd> <dt><code>txudptnl</code></dt><dd>tx-udp-tunnel-segmentation</dd> + <dt><code>switchdev</code></dt><dd>kernel-forward-plane-offload</dd> </dl> </dd> <dt><code>capability</code></dt> diff --git a/src/util/virnetdev.c b/src/util/virnetdev.c index 90b7bee..084fb41 100644 --- a/src/util/virnetdev.c +++ b/src/util/virnetdev.c @@ -59,6 +59,10 @@ # include <net/if_dl.h> #endif +#if HAVE_DECL_DEVLINK +# include <linux/devlink.h> +#endif + #ifndef IFNAMSIZ # define IFNAMSIZ 16 #endif @@ -95,6 +99,7 @@ VIR_LOG_INIT("util.netdev"); (FEATURE_WORD(blocks, index, field) & FEATURE_FIELD_FLAG(index)) #endif + typedef enum { VIR_MCAST_TYPE_INDEX_TOKEN, VIR_MCAST_TYPE_NAME_TOKEN, @@ -2396,7 +2401,8 @@ VIR_ENUM_IMPL(virNetDevFeature, "ntuple", "rxhash", "rdma", - "txudptnl") + "txudptnl", + "switchdev") #ifdef __linux__ int @@ -2851,6 +2857,199 @@ int virNetDevGetRxFilter(const char *ifname, return ret; } + + +#if HAVE_DECL_DEVLINK +/** + * virNetDevPutExtraHeader + * reserve and prepare room for an extra header + * This function sets to zero the room that is required to put the extra + * header after the initial Netlink header. This function also increases + * the nlmsg_len field. You have to invoke mnl_nlmsg_put_header() before + * you call this function. This function returns a pointer to the extra + * header. + * + * @nlh: pointer to Netlink header + * @size: size of the extra header that we want to put + * + * Returns pointer to the start of the extended header + */ +static void * +virNetDevPutExtraHeader(struct nlmsghdr *nlh, size_t size) +{ + char *ptr = (char *)nlh + nlh->nlmsg_len; + size_t len = NLMSG_ALIGN(size); + nlh->nlmsg_len += len; + memset(ptr, 0, len); + return ptr; +} + + +/** + * virNetDevGetFamilyId: + * This function supplies the devlink family id + * + * @family_name: the name of the family to query + * + * Returns family id or 0 on failure. + */ +static int +virNetDevGetFamilyId(const char *family_name) +{ + struct nl_msg *nl_msg = NULL; + struct nlmsghdr *resp = NULL; + struct genlmsghdr* gmsgh = NULL; + struct nlattr *tb[CTRL_ATTR_MAX + 1] = {NULL, }; + unsigned int recvbuflen; + uint32_t family_id = 0; + + if (!(nl_msg = nlmsg_alloc_simple(GENL_ID_CTRL, + NLM_F_REQUEST | NLM_F_ACK))) { + virReportOOMError(); + goto cleanup; + } + + gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr)); + if (!gmsgh) + goto cleanup; + + gmsgh->cmd = CTRL_CMD_GETFAMILY; + gmsgh->version = DEVLINK_GENL_VERSION; + + if (nla_put_string(nl_msg, CTRL_ATTR_FAMILY_NAME, family_name) < 0) + goto buffer_too_small; + + if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, NETLINK_GENERIC, 0) < 0) + goto cleanup; + + if (nlmsg_parse(resp, sizeof(struct nlmsghdr), tb, CTRL_CMD_MAX, NULL) < 0) + goto malformed_resp; + + if (tb[CTRL_ATTR_FAMILY_ID] == NULL) + goto cleanup; + + family_id = *(int *)RTA_DATA(tb[CTRL_ATTR_FAMILY_ID]); + + cleanup: + nlmsg_free(nl_msg); + VIR_FREE(resp); + return family_id; + + malformed_resp: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + goto cleanup; + + buffer_too_small: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("allocated netlink buffer is too small")); + goto cleanup; +} + + +/** + * virNetDevSwitchdevFeature + * This function checks for the availability of Switchdev feature + * and add it to bitmap + * + * @ifname: name of the interface + * @out: add Switchdev feature if exist to bitmap + * + * Returns 0 on success, -1 on failure. + */ +static int +virNetDevSwitchdevFeature(const char *ifname, + virBitmapPtr *out) +{ + struct nl_msg *nl_msg = NULL; + struct nlmsghdr *resp = NULL; + unsigned int recvbuflen; + struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {NULL, }; + virPCIDevicePtr pci_device_ptr = NULL; + struct genlmsghdr* gmsgh = NULL; + const char *pci_name; + char *pfname = NULL; + int func_ret_val = -1; + int ret = -1; + + int family_id = virNetDevGetFamilyId(DEVLINK_GENL_NAME); + if (family_id == 0) + goto cleanup; + + func_ret_val = virNetDevIsVirtualFunction(ifname); + if (func_ret_val == 1) { + if (virNetDevGetPhysicalFunction(ifname, &pfname) < 0) + goto cleanup; + } + else if (func_ret_val < 0) + goto cleanup; + + if (!(nl_msg = nlmsg_alloc_simple(family_id, + NLM_F_REQUEST | NLM_F_ACK))) { + virReportOOMError(); + goto cleanup; + } + + pci_device_ptr = pfname ? virNetDevGetPCIDevice(pfname) : + virNetDevGetPCIDevice(ifname); + if (!pci_device_ptr) + goto cleanup; + + pci_name = virPCIDeviceGetName(pci_device_ptr); + + gmsgh = virNetDevPutExtraHeader(nlmsg_hdr(nl_msg), sizeof(struct genlmsghdr)); + if (!gmsgh) + goto cleanup; + + gmsgh->cmd = DEVLINK_CMD_ESWITCH_GET; + gmsgh->version = DEVLINK_GENL_VERSION; + + if (nla_put(nl_msg, DEVLINK_ATTR_BUS_NAME, strlen("pci")+1, "pci") < 0) + goto buffer_too_small; + + if (nla_put(nl_msg, DEVLINK_ATTR_DEV_NAME, strlen(pci_name)+1, pci_name) < 0) + goto buffer_too_small; + + if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0, + NETLINK_GENERIC, 0) < 0) + goto cleanup; + + if (nlmsg_parse(resp, sizeof(struct genlmsghdr), tb, DEVLINK_ATTR_MAX, NULL) < 0) + goto malformed_resp; + + if (tb[DEVLINK_ATTR_ESWITCH_MODE] && + *(int *)RTA_DATA(tb[DEVLINK_ATTR_ESWITCH_MODE]) == DEVLINK_ESWITCH_MODE_SWITCHDEV) { + ignore_value(virBitmapSetBit(*out, VIR_NET_DEV_FEAT_SWITCHDEV)); + } + + ret = 0; + + cleanup: + nlmsg_free(nl_msg); + virPCIDeviceFree(pci_device_ptr); + VIR_FREE(resp); + return ret; + + malformed_resp: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + goto cleanup; + + buffer_too_small: + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("allocated netlink buffer is too small")); + goto cleanup; +} +# else +static int +virNetDevSwitchdevFeature(const char *ifname, + virBitmapPtr *out) +{ + return 0; +} +# endif + + #if defined(SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ) /** @@ -3230,6 +3429,9 @@ virNetDevGetFeatures(const char *ifname, if (virNetDevRDMAFeature(ifname, out) < 0) goto cleanup; + if (virNetDevSwitchdevFeature(ifname, out) < 0) + goto cleanup; + ret = 0; cleanup: VIR_FORCE_CLOSE(fd); diff --git a/src/util/virnetdev.h b/src/util/virnetdev.h index 2e9a9c4..8fd6036 100644 --- a/src/util/virnetdev.h +++ b/src/util/virnetdev.h @@ -112,6 +112,7 @@ typedef enum { VIR_NET_DEV_FEAT_RXHASH, VIR_NET_DEV_FEAT_RDMA, VIR_NET_DEV_FEAT_TXUDPTNL, + VIR_NET_DEV_FEAT_SWITCHDEV, VIR_NET_DEV_FEAT_LAST } virNetDevFeature; diff --git a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml index d4c96e8..88252e6 100644 --- a/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml +++ b/tests/nodedevschemadata/net_00_13_02_b9_f9_d3.xml @@ -15,6 +15,7 @@ <feature name='rxhash'/> <feature name='rdma'/> <feature name='txudptnl'/> + <feature name='switchdev'/> <capability type='80211'/> </capability> </device> diff --git a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml index 71bf90e..f77dfcc 100644 --- a/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml +++ b/tests/nodedevschemadata/net_00_15_58_2f_e9_55.xml @@ -15,6 +15,7 @@ <feature name='rxhash'/> <feature name='rdma'/> <feature name='txudptnl'/> + <feature name='switchdev'/> <capability type='80203'/> </capability> </device> -- 2.1.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list