On Mon, 28 Oct 2019 21:07:04 +0000, Haiyang Zhang wrote: > This patch adds support of XDP in native mode for hv_netvsc driver, and > transparently sets the XDP program on the associated VF NIC as well. > > XDP program cannot run with LRO (RSC) enabled, so you need to disable LRO > before running XDP: > ethtool -K eth0 lro off > > XDP actions not yet supported: > XDP_TX, XDP_REDIRECT I don't think we want to merge support without at least XDP_TX these days.. And without the ability to prepend headers this may be the least complete initial XDP implementation we've seen :( > Signed-off-by: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> > diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c > index d22a36f..688487b 100644 > --- a/drivers/net/hyperv/netvsc.c > +++ b/drivers/net/hyperv/netvsc.c > @@ -122,8 +122,10 @@ static void free_netvsc_device(struct rcu_head *head) > vfree(nvdev->send_buf); > kfree(nvdev->send_section_map); > > - for (i = 0; i < VRSS_CHANNEL_MAX; i++) > + for (i = 0; i < VRSS_CHANNEL_MAX; i++) { > + xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq); > vfree(nvdev->chan_table[i].mrc.slots); > + } > > kfree(nvdev); > } > @@ -1370,6 +1372,10 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, > nvchan->net_device = net_device; > u64_stats_init(&nvchan->tx_stats.syncp); > u64_stats_init(&nvchan->rx_stats.syncp); > + > + xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i); > + xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq, > + MEM_TYPE_PAGE_SHARED, NULL); These can fail. > } > > /* Enable NAPI handler before init callbacks */ > diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c > new file mode 100644 > index 0000000..4d235ac > --- /dev/null > +++ b/drivers/net/hyperv/netvsc_bpf.c > @@ -0,0 +1,211 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* Copyright (c) 2019, Microsoft Corporation. > + * > + * Author: > + * Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> > + */ > + > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > + > +#include <linux/netdevice.h> > +#include <linux/etherdevice.h> > +#include <linux/ethtool.h> > +#include <linux/bpf.h> > +#include <linux/bpf_trace.h> > +#include <linux/kernel.h> > +#include <net/xdp.h> > + > +#include <linux/mutex.h> > +#include <linux/rtnetlink.h> > + > +#include "hyperv_net.h" > + > +u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, > + void **p_pbuf) > +{ > + struct page *page = NULL; > + void *data = nvchan->rsc.data[0]; > + u32 len = nvchan->rsc.len[0]; > + void *pbuf = data; > + struct bpf_prog *prog; > + struct xdp_buff xdp; > + u32 act = XDP_PASS; > + > + *p_pbuf = NULL; > + > + rcu_read_lock(); > + prog = rcu_dereference(nvchan->bpf_prog); > + > + if (!prog || nvchan->rsc.cnt > 1) Can rsc.cnt == 1 not be ensured at setup time? This looks quite limiting if random frames could be forced to bypass the filter. > + goto out; > + > + /* copy to a new page buffer if data are not within a page */ > + if (virt_to_page(data) != virt_to_page(data + len - 1)) { > + page = alloc_page(GFP_ATOMIC); > + if (!page) > + goto out; Returning XDP_PASS on allocation failure seems highly questionable. > + pbuf = page_address(page); > + memcpy(pbuf, nvchan->rsc.data[0], len); > + > + *p_pbuf = pbuf; > + } > + > + xdp.data_hard_start = pbuf; > + xdp.data = xdp.data_hard_start; This patch also doesn't add any headroom for XDP to prepend data :( > + xdp_set_data_meta_invalid(&xdp); > + xdp.data_end = xdp.data + len; > + xdp.rxq = &nvchan->xdp_rxq; > + xdp.handle = 0; > + > + act = bpf_prog_run_xdp(prog, &xdp); > + > + switch (act) { > + case XDP_PASS: > + /* Pass to upper layers */ > + break; > + > + case XDP_ABORTED: > + trace_xdp_exception(ndev, prog, act); > + break; > + > + case XDP_DROP: > + break; > + > + default: > + bpf_warn_invalid_xdp_action(act); > + } > + > +out: > + rcu_read_unlock(); > + > + if (page && act != XDP_PASS) { > + *p_pbuf = NULL; > + __free_page(page); > + } > + > + return act; > +} > + > +unsigned int netvsc_xdp_fraglen(unsigned int len) > +{ > + return SKB_DATA_ALIGN(len) + > + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); > +} > + > +struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev) > +{ > + return rtnl_dereference(nvdev->chan_table[0].bpf_prog); > +} > + > +int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog, > + struct netvsc_device *nvdev) > +{ > + struct bpf_prog *old_prog; > + int frag_max, i; > + > + old_prog = netvsc_xdp_get(nvdev); > + > + if (!old_prog && !prog) > + return 0; I think this case is now handled by the core. > + frag_max = netvsc_xdp_fraglen(dev->mtu + ETH_HLEN); > + if (prog && frag_max > PAGE_SIZE) { > + netdev_err(dev, "XDP: mtu:%u too large, frag:%u\n", > + dev->mtu, frag_max); > + return -EOPNOTSUPP; > + } > + > + if (prog && (dev->features & NETIF_F_LRO)) { > + netdev_err(dev, "XDP: not support LRO\n"); Please report this via extack, that way users will see it in the console in which they're installing the program. > + return -EOPNOTSUPP; > + } > + > + if (prog) { > + prog = bpf_prog_add(prog, nvdev->num_chn); > + if (IS_ERR(prog)) > + return PTR_ERR(prog); > + } > + > + for (i = 0; i < nvdev->num_chn; i++) > + rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog); > + > + if (old_prog) > + for (i = 0; i < nvdev->num_chn; i++) > + bpf_prog_put(old_prog); > + > + return 0; > +} > + > +int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog) > +{ > + struct netdev_bpf xdp; > + bpf_op_t ndo_bpf; > + > + ASSERT_RTNL(); > + > + if (!vf_netdev) > + return 0; > + > + ndo_bpf = vf_netdev->netdev_ops->ndo_bpf; > + if (!ndo_bpf) > + return 0; > + > + memset(&xdp, 0, sizeof(xdp)); > + > + xdp.command = XDP_SETUP_PROG; > + xdp.prog = prog; > + > + return ndo_bpf(vf_netdev, &xdp); IMHO the automatic propagation is not a good idea. Especially if the propagation doesn't make the entire installation fail if VF doesn't have ndo_bpf. > +}