Add support of XDP for the MANA driver. Supported XDP actions: XDP_PASS, XDP_TX, XDP_DROP, XDP_ABORTED XDP actions not yet supported: XDP_REDIRECT Signed-off-by: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> --- drivers/net/ethernet/microsoft/mana/Makefile | 2 +- drivers/net/ethernet/microsoft/mana/mana.h | 13 ++ .../net/ethernet/microsoft/mana/mana_bpf.c | 162 ++++++++++++++++++ drivers/net/ethernet/microsoft/mana/mana_en.c | 69 ++++++-- 4 files changed, 235 insertions(+), 11 deletions(-) create mode 100644 drivers/net/ethernet/microsoft/mana/mana_bpf.c diff --git a/drivers/net/ethernet/microsoft/mana/Makefile b/drivers/net/ethernet/microsoft/mana/Makefile index 0edd5bb685f3..e16a4221f571 100644 --- a/drivers/net/ethernet/microsoft/mana/Makefile +++ b/drivers/net/ethernet/microsoft/mana/Makefile @@ -3,4 +3,4 @@ # Makefile for the Microsoft Azure Network Adapter driver obj-$(CONFIG_MICROSOFT_MANA) += mana.o -mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o +mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o mana_bpf.o diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h index d047ee876f12..0c5553887b75 100644 --- a/drivers/net/ethernet/microsoft/mana/mana.h +++ b/drivers/net/ethernet/microsoft/mana/mana.h @@ -298,6 +298,9 @@ struct mana_rxq { struct mana_stats stats; + struct bpf_prog __rcu *bpf_prog; + struct xdp_rxq_info xdp_rxq; + /* MUST BE THE LAST MEMBER: * Each receive buffer has an associated mana_recv_buf_oob. */ @@ -353,6 +356,8 @@ struct mana_port_context { /* This points to an array of num_queues of RQ pointers. */ struct mana_rxq **rxqs; + struct bpf_prog *bpf_prog; + /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ unsigned int max_queues; unsigned int num_queues; @@ -367,6 +372,7 @@ struct mana_port_context { struct mana_ethtool_stats eth_stats; }; +int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, bool update_hash, bool update_tab); @@ -377,6 +383,13 @@ int mana_detach(struct net_device *ndev, bool from_close); int mana_probe(struct gdma_dev *gd, bool resuming); void mana_remove(struct gdma_dev *gd, bool suspending); +void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev); +u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, + struct xdp_buff *xdp, void *buf_va, uint pkt_len); +struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); +void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); +int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); + extern const struct ethtool_ops mana_ethtool_ops; struct mana_obj_spec { diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c new file mode 100644 index 000000000000..1bc8ff388341 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include <linux/inetdevice.h> +#include <linux/etherdevice.h> +#include <linux/mm.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <net/xdp.h> + +#include "mana.h" + +void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev) +{ + u16 txq_idx = skb_get_queue_mapping(skb); + struct netdev_queue *ndevtxq; + int rc; + + __skb_push(skb, ETH_HLEN); + + ndevtxq = netdev_get_tx_queue(ndev, txq_idx); + __netif_tx_lock(ndevtxq, smp_processor_id()); + + rc = mana_start_xmit(skb, ndev); + + __netif_tx_unlock(ndevtxq); + + if (dev_xmit_complete(rc)) + return; + + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; +} + +u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, + struct xdp_buff *xdp, void *buf_va, uint pkt_len) +{ + struct bpf_prog *prog; + u32 act = XDP_PASS; + + rcu_read_lock(); + prog = rcu_dereference(rxq->bpf_prog); + + if (!prog) + goto out; + + xdp_init_buff(xdp, PAGE_SIZE, &rxq->xdp_rxq); + xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, false); + + act = bpf_prog_run_xdp(prog, xdp); + + switch (act) { + case XDP_PASS: + case XDP_TX: + case XDP_DROP: + break; + + case XDP_ABORTED: + trace_xdp_exception(ndev, prog, act); + break; + + default: + bpf_warn_invalid_xdp_action(act); + } + +out: + rcu_read_unlock(); + + return act; +} + +static unsigned int mana_xdp_fraglen(unsigned int len) +{ + return SKB_DATA_ALIGN(len) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); +} + +struct bpf_prog *mana_xdp_get(struct mana_port_context *apc) +{ + ASSERT_RTNL(); + + return apc->bpf_prog; +} + +static struct bpf_prog *mana_chn_xdp_get(struct mana_port_context *apc) +{ + return rtnl_dereference(apc->rxqs[0]->bpf_prog); +} + +/* Set xdp program on channels */ +void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog = mana_chn_xdp_get(apc); + unsigned int num_queues = apc->num_queues; + int i; + + ASSERT_RTNL(); + + if (old_prog == prog) + return; + + if (prog) + bpf_prog_add(prog, num_queues); + + for (i = 0; i < num_queues; i++) + rcu_assign_pointer(apc->rxqs[i]->bpf_prog, prog); + + if (old_prog) + for (i = 0; i < num_queues; i++) + bpf_prog_put(old_prog); +} + +static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct bpf_prog *old_prog; + int buf_max; + + old_prog = mana_xdp_get(apc); + + if (!old_prog && !prog) + return 0; + + buf_max = XDP_PACKET_HEADROOM + mana_xdp_fraglen(ndev->mtu + ETH_HLEN); + if (prog && buf_max > PAGE_SIZE) { + netdev_err(ndev, "XDP: mtu:%u too large, buf_max:%u\n", + ndev->mtu, buf_max); + NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large"); + + return -EOPNOTSUPP; + } + + /* One refcnt of the prog is hold by the caller already, so + * don't increase refcnt for this one. + */ + apc->bpf_prog = prog; + + if (old_prog) + bpf_prog_put(old_prog); + + if (apc->port_is_up) + mana_chn_setxdp(apc, prog); + + return 0; +} + +int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf) +{ + struct netlink_ext_ack *extack = bpf->extack; + int ret; + + switch (bpf->command) { + case XDP_SETUP_PROG: + return mana_xdp_set(ndev, bpf->prog, extack); + + default: + return -EOPNOTSUPP; + } + + return ret; +} diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 72cbf45c42d8..c1d5a374b967 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -125,7 +125,7 @@ static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, return -ENOMEM; } -static int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) +int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) { enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; struct mana_port_context *apc = netdev_priv(ndev); @@ -378,6 +378,7 @@ static const struct net_device_ops mana_devops = { .ndo_start_xmit = mana_start_xmit, .ndo_validate_addr = eth_validate_addr, .ndo_get_stats64 = mana_get_stats64, + .ndo_bpf = mana_bpf, }; static void mana_cleanup_port_context(struct mana_port_context *apc) @@ -906,6 +907,25 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq) WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); } +static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len, + struct xdp_buff *xdp) +{ + struct sk_buff *skb = build_skb(buf_va, PAGE_SIZE); + + if (!skb) + return NULL; + + if (xdp->data_hard_start) { + skb_reserve(skb, xdp->data - xdp->data_hard_start); + skb_put(skb, xdp->data_end - xdp->data); + } else { + skb_reserve(skb, XDP_PACKET_HEADROOM); + skb_put(skb, pkt_len); + } + + return skb; +} + static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq) { @@ -914,8 +934,10 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, uint pkt_len = cqe->ppi[0].pkt_len; u16 rxq_idx = rxq->rxq_idx; struct napi_struct *napi; + struct xdp_buff xdp = {}; struct sk_buff *skb; u32 hash_value; + u32 act; rxq->rx_cq.work_done++; napi = &rxq->rx_cq.napi; @@ -925,15 +947,16 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, return; } - skb = build_skb(buf_va, PAGE_SIZE); + act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); - if (!skb) { - free_page((unsigned long)buf_va); - ++ndev->stats.rx_dropped; - return; - } + if (act != XDP_PASS && act != XDP_TX) + goto drop; + + skb = mana_build_skb(buf_va, pkt_len, &xdp); + + if (!skb) + goto drop; - skb_put(skb, pkt_len); skb->dev = napi->dev; skb->protocol = eth_type_trans(skb, ndev); @@ -954,12 +977,24 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); } + if (act == XDP_TX) { + skb_set_queue_mapping(skb, rxq_idx); + mana_xdp_tx(skb, ndev); + return; + } + napi_gro_receive(napi, skb); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += pkt_len; u64_stats_update_end(&rx_stats->syncp); + return; + +drop: + free_page((unsigned long)buf_va); + ++ndev->stats.rx_dropped; + return; } static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, @@ -1016,7 +1051,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, new_page = alloc_page(GFP_ATOMIC); if (new_page) { - da = dma_map_page(dev, new_page, 0, rxq->datasize, + da = dma_map_page(dev, new_page, XDP_PACKET_HEADROOM, rxq->datasize, DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { @@ -1291,6 +1326,9 @@ static void mana_destroy_rxq(struct mana_port_context *apc, napi_synchronize(napi); napi_disable(napi); + + xdp_rxq_info_unreg(&rxq->xdp_rxq); + netif_napi_del(napi); mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); @@ -1342,7 +1380,8 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc, if (!page) return -ENOMEM; - da = dma_map_page(dev, page, 0, rxq->datasize, DMA_FROM_DEVICE); + da = dma_map_page(dev, page, XDP_PACKET_HEADROOM, rxq->datasize, + DMA_FROM_DEVICE); if (dma_mapping_error(dev, da)) { __free_page(page); @@ -1485,6 +1524,12 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; netif_napi_add(ndev, &cq->napi, mana_poll, 1); + + WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, + cq->napi.napi_id)); + WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, + MEM_TYPE_PAGE_SHARED, NULL)); + napi_enable(&cq->napi); mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); @@ -1650,6 +1695,8 @@ int mana_alloc_queues(struct net_device *ndev) if (err) goto destroy_vport; + mana_chn_setxdp(apc, mana_xdp_get(apc)); + return 0; destroy_vport: @@ -1698,6 +1745,8 @@ static int mana_dealloc_queues(struct net_device *ndev) if (apc->port_is_up) return -EINVAL; + mana_chn_setxdp(apc, NULL); + /* No packet can be transmitted now since apc->port_is_up is false. * There is still a tiny chance that mana_poll_tx_cq() can re-enable * a txq because it may not timely see apc->port_is_up being cleared -- 2.25.1