On Thu, Dec 08, 2022 at 06:40:45AM +0100, Gerhard Engleder wrote: > If BPF program is set up, then run BPF program for every received frame > and execute the selected action. > > Test results with A53 1.2GHz: > > XDP_DROP (samples/bpf/xdp1) > proto 17: 883878 pkt/s > > XDP_TX (samples/bpf/xdp2) > proto 17: 255693 pkt/s > > XDP_REDIRECT (samples/bpf/xdpsock) > sock0@eth2:0 rxdrop xdp-drv > pps pkts 1.00 > rx 855,582 5,404,523 > tx 0 0 > > XDP_REDIRECT (samples/bpf/xdp_redirect) > eth2->eth1 613,267 rx/s 0 err,drop/s 613,272 xmit/s > > Signed-off-by: Gerhard Engleder <gerhard@xxxxxxxxxxxxxxxxxxxxx> > --- > drivers/net/ethernet/engleder/tsnep_main.c | 126 +++++++++++++++++++++ > 1 file changed, 126 insertions(+) > > diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c > index 2b662a98b62a..d59cb714c8cd 100644 > --- a/drivers/net/ethernet/engleder/tsnep_main.c > +++ b/drivers/net/ethernet/engleder/tsnep_main.c > @@ -27,6 +27,7 @@ > #include <linux/phy.h> > #include <linux/iopoll.h> > #include <linux/bpf.h> > +#include <linux/bpf_trace.h> > > #define TSNEP_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) > #define TSNEP_HEADROOM ALIGN(max(TSNEP_SKB_PAD, XDP_PACKET_HEADROOM), 4) > @@ -44,6 +45,9 @@ > #define TSNEP_COALESCE_USECS_MAX ((ECM_INT_DELAY_MASK >> ECM_INT_DELAY_SHIFT) * \ > ECM_INT_DELAY_BASE_US + ECM_INT_DELAY_BASE_US - 1) > > +#define TSNEP_XDP_TX BIT(0) > +#define TSNEP_XDP_REDIRECT BIT(1) > + > enum { > __TSNEP_DOWN, > }; > @@ -626,6 +630,33 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx) > iowrite32(TSNEP_CONTROL_TX_ENABLE, tx->addr + TSNEP_CONTROL); > } > > +static int tsnep_xdp_xmit_back(struct tsnep_adapter *adapter, > + struct xdp_buff *xdp) > +{ > + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); > + int cpu = smp_processor_id(); > + int queue; > + struct netdev_queue *nq; > + int retval; > + > + if (unlikely(!xdpf)) > + return -EFAULT; > + > + queue = cpu % adapter->num_tx_queues; > + nq = netdev_get_tx_queue(adapter->netdev, queue); > + > + __netif_tx_lock(nq, cpu); > + > + /* Avoid transmit queue timeout since we share it with the slow path */ > + txq_trans_cond_update(nq); > + > + retval = tsnep_xdp_xmit_frame_ring(xdpf, &adapter->tx[queue], false); > + > + __netif_tx_unlock(nq); > + > + return retval; > +} > + > static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) > { > unsigned long flags; > @@ -792,6 +823,11 @@ static unsigned int tsnep_rx_offset(struct tsnep_rx *rx) > return TSNEP_SKB_PAD; > } > > +static unsigned int tsnep_rx_offset_xdp(void) > +{ > + return XDP_PACKET_HEADROOM; > +} I don't see much of a value in this func :P > + > static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx) > { > struct device *dmadev = rx->adapter->dmadev; > @@ -997,6 +1033,67 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse) > return i; > } > > +static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, > + struct xdp_buff *xdp, int *status) > +{ > + unsigned int length; > + unsigned int sync; > + u32 act; > + > + length = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp(); could this be xdp->data_end - xdp->data - TSNEP_RX_INLINE_METADATA_SIZE ? Can you tell a bit more about that metadata macro that you have to handle by yourself all the time? would be good to tell about the impact on data_meta since you're not configuring it on xdp_prepare_buff(). > + > + act = bpf_prog_run_xdp(prog, xdp); > + > + /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ > + sync = xdp->data_end - xdp->data_hard_start - tsnep_rx_offset_xdp(); > + sync = max(sync, length); > + > + switch (act) { > + case XDP_PASS: > + return false; > + case XDP_TX: > + if (tsnep_xdp_xmit_back(rx->adapter, xdp) < 0) > + goto out_failure; > + *status |= TSNEP_XDP_TX; > + return true; > + case XDP_REDIRECT: > + if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0) > + goto out_failure; > + *status |= TSNEP_XDP_REDIRECT; > + return true; > + default: > + bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act); > + fallthrough; > + case XDP_ABORTED: > +out_failure: > + trace_xdp_exception(rx->adapter->netdev, prog, act); > + fallthrough; > + case XDP_DROP: > + page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data), > + sync, true); > + return true; > + } > +} > + > +static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status) > +{ > + int cpu = smp_processor_id(); > + int queue; > + struct netdev_queue *nq; do you care about RCT, or? > + > + if (status & TSNEP_XDP_TX) { > + queue = cpu % adapter->num_tx_queues; > + nq = netdev_get_tx_queue(adapter->netdev, queue); > + > + __netif_tx_lock(nq, cpu); > + tsnep_xdp_xmit_flush(&adapter->tx[queue]); > + __netif_tx_unlock(nq); > + } > + > + if (status & TSNEP_XDP_REDIRECT) > + xdp_do_flush(); > +} > + > static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page, > int length) did you consider making tsnep_build_skb() to work on xdp_buff directly? probably will help you once you'll implement XDP mbuf support here. > { > @@ -1035,12 +1132,16 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, > int desc_available; > int done = 0; > enum dma_data_direction dma_dir; > + struct bpf_prog *prog; > struct tsnep_rx_entry *entry; > + struct xdp_buff xdp; > + int xdp_status = 0; > struct sk_buff *skb; > int length; > > desc_available = tsnep_rx_desc_available(rx); > dma_dir = page_pool_get_dma_dir(rx->page_pool); > + prog = READ_ONCE(rx->adapter->xdp_prog); > > while (likely(done < budget) && (rx->read != rx->write)) { > entry = &rx->entry[rx->read]; > @@ -1084,6 +1185,28 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, > rx->read = (rx->read + 1) % TSNEP_RING_SIZE; > desc_available++; > > + if (prog) { > + bool consume; > + > + xdp_init_buff(&xdp, PAGE_SIZE, &rx->xdp_rxq); > + xdp_prepare_buff(&xdp, page_address(entry->page), > + tsnep_rx_offset_xdp() + TSNEP_RX_INLINE_METADATA_SIZE, > + length - TSNEP_RX_INLINE_METADATA_SIZE, Would it make sense to subtract TSNEP_RX_INLINE_METADATA_SIZE from length right after dma_sync_single_range_for_cpu? > + false); > + > + consume = tsnep_xdp_run_prog(rx, prog, &xdp, > + &xdp_status); > + if (consume) { > + rx->packets++; > + rx->bytes += > + length - TSNEP_RX_INLINE_METADATA_SIZE; > + > + entry->page = NULL; > + > + continue; > + } > + } > + > skb = tsnep_build_skb(rx, entry->page, length); > if (skb) { > page_pool_release_page(rx->page_pool, entry->page); > @@ -1102,6 +1225,9 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, > entry->page = NULL; > } > > + if (xdp_status) > + tsnep_finalize_xdp(rx->adapter, xdp_status); > + > if (desc_available) > tsnep_rx_refill(rx, desc_available, false); > > -- > 2.30.2 >