On 14.09.2021 11:31, Guilin Tang wrote: > Implement XDP_REDIRECT based on the r8169 XDP implementation. > Can use sample/bpf/xdpsock test. > > Signed-off-by: Guilin Tang <tangguilin@xxxxxxxxxxxxx> > --- > drivers/net/ethernet/realtek/r8169_main.c | 78 +++++++++++++++++++---- > 1 file changed, 65 insertions(+), 13 deletions(-) > > diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c > index 69bc3c68e73d..b56899530ed5 100644 > --- a/drivers/net/ethernet/realtek/r8169_main.c > +++ b/drivers/net/ethernet/realtek/r8169_main.c > @@ -69,6 +69,8 @@ > > #define R8169_REGS_SIZE 256 > #define R8169_RX_BUF_SIZE (SZ_16K - 1) > +/*Reserve space for XDP frame*/ > +#define R8169_RX_XDP_BUF_SIZE (R8169_RX_BUF_SIZE - XDP_PACKET_HEADROOM) > #define NUM_TX_DESC 256 /* Number of Tx descriptor registers */ > #define NUM_RX_DESC 256 /* Number of Rx descriptor registers */ > #define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) > @@ -645,6 +647,7 @@ struct rtl8169_private { > u32 ocp_base; > /*xdp bpf*/ > struct bpf_prog *rtl_xdp; > + struct xdp_rxq_info rtl_rxq; > }; > > typedef void (*rtl_generic_fct)(struct rtl8169_private *tp); > @@ -2516,7 +2519,7 @@ static void rtl_set_tx_config_registers(struct rtl8169_private *tp) > static void rtl_set_rx_max_size(struct rtl8169_private *tp) > { > /* Low hurts. Let's disable the filtering. */ > - RTL_W16(tp, RxMaxSize, R8169_RX_BUF_SIZE + 1); > + RTL_W16(tp, RxMaxSize, R8169_RX_XDP_BUF_SIZE + 1); You shouldn't have to change this value. Max mtu is 9k and this 16k buffer is used due to hw bug on an older chip versions. > } > > static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp) > @@ -3866,7 +3869,7 @@ static void rtl8169_mark_to_asic(struct RxDesc *desc) > desc->opts2 = 0; > /* Force memory writes to complete before releasing descriptor */ > dma_wmb(); > - WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE)); > + WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_XDP_BUF_SIZE)); > } > > static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp, > @@ -3881,7 +3884,8 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp, > if (!data) > return NULL; > > - mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE); > + mapping = dma_map_page(d, data, XDP_PACKET_HEADROOM, > + R8169_RX_XDP_BUF_SIZE, DMA_FROM_DEVICE); > if (unlikely(dma_mapping_error(d, mapping))) { > netdev_err(tp->dev, "Failed to map RX DMA!\n"); > __free_pages(data, get_order(R8169_RX_BUF_SIZE)); > @@ -3901,19 +3905,28 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp) > for (i = 0; i < NUM_RX_DESC && tp->Rx_databuff[i]; i++) { > dma_unmap_page(tp_to_dev(tp), > le64_to_cpu(tp->RxDescArray[i].addr), > - R8169_RX_BUF_SIZE, DMA_FROM_DEVICE); > + R8169_RX_XDP_BUF_SIZE, DMA_FROM_DEVICE); > __free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE)); > tp->Rx_databuff[i] = NULL; > tp->RxDescArray[i].addr = 0; > tp->RxDescArray[i].opts1 = 0; > } > tp->rtl_xdp = NULL; > + xdp_rxq_info_unreg(&tp->rtl_rxq); > } > > static int rtl8169_rx_fill(struct rtl8169_private *tp) > { > int i; > > + if (xdp_rxq_info_reg(&tp->rtl_rxq, tp->dev, 0, 0)) > + return -1; Again, if you need an errno, use an errno. > + > + if (xdp_rxq_info_reg_mem_model(&tp->rtl_rxq, MEM_TYPE_PAGE_SHARED, NULL)) { > + xdp_rxq_info_unreg(&tp->rtl_rxq); > + return -1; > + } > + > for (i = 0; i < NUM_RX_DESC; i++) { > struct page *data; > > @@ -4513,24 +4526,50 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1) > skb_checksum_none_assert(skb); > } > > +struct page *rtl8169_realloc_rx_data(struct rtl8169_private *tp, struct RxDesc *desc) > +{ > + struct page *data = NULL; > + > + do { > + data = rtl8169_alloc_rx_data(tp, desc); > + } while (data == NULL); > + > + return data; > +} > + > static struct sk_buff *rtl8619_run_xdp(struct rtl8169_private *tp, struct bpf_prog *xdp_prog, > - void *rx_buf, unsigned int pkt_size) > + void *rx_buf, unsigned int pkt_size, unsigned int entry) > { > - int result = R8169_XDP_PASS; > + int err, result = R8169_XDP_PASS; > struct xdp_buff xdp; > u32 act; > + struct RxDesc *desc = tp->RxDescArray + entry; > > - xdp.data = rx_buf; > - xdp.data_end = xdp.data + pkt_size; > - xdp_set_data_meta_invalid(&xdp); > + xdp_init_buff(&xdp, R8169_RX_XDP_BUF_SIZE, &tp->rtl_rxq); > + xdp_prepare_buff(&xdp, (unsigned char *)rx_buf, XDP_PACKET_HEADROOM, pkt_size, true); > + prefetchw(xdp.data_hard_start); > > act = bpf_prog_run_xdp(xdp_prog, &xdp); > switch (act) { > case XDP_PASS: > break; > case XDP_TX: > - case XDP_REDIRECT: > goto out_failure; > + case XDP_REDIRECT: > + /*unmap dma page*/ > + dma_unmap_page(tp_to_dev(tp), le64_to_cpu(desc->addr), > + R8169_RX_XDP_BUF_SIZE, DMA_FROM_DEVICE); > + err = xdp_do_redirect(tp->dev, &xdp, xdp_prog); > + if (unlikely(err)) { > + /*free page*/ > + __free_pages(tp->Rx_databuff[entry], get_order(R8169_RX_BUF_SIZE)); > + /*realloc*/ > + tp->Rx_databuff[entry] = rtl8169_realloc_rx_data(tp, desc); Why this reallocation? > + goto out_failure; > + } else { > + result = R8169_XDP_REDIR; > + } > + break; > default: > bpf_warn_invalid_xdp_action(act); > fallthrough; > @@ -4551,6 +4590,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget > struct device *d = tp_to_dev(tp); > int count; > struct bpf_prog *xdp_prog; > + unsigned int xdp_xmit = 0; > > for (count = 0; count < budget; count++, tp->cur_rx++) { > unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC; > @@ -4607,9 +4647,18 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget > //Determine whether to execute xdp > xdp_prog = READ_ONCE(tp->rtl_xdp); > if (xdp_prog) { > - skb = rtl8619_run_xdp(tp, xdp_prog, (void *)rx_buf, pkt_size); > + skb = rtl8619_run_xdp(tp, xdp_prog, (void *)rx_buf, pkt_size, entry); > if (IS_ERR(skb)) { > - dev->stats.rx_dropped++; > + unsigned int xdp_res = -PTR_ERR(skb); > + > + if (xdp_res & R8169_XDP_REDIR) { > + tp->Rx_databuff[entry] = rtl8169_realloc_rx_data(tp, desc); Why this reallocation? > + xdp_xmit |= xdp_res; > + dev->stats.rx_packets++; Why not use dev_sw_netstats_rx_add() like in standard path? > + dev->stats.rx_bytes += pkt_size; > + } else { > + dev->stats.rx_dropped++; > + } > goto release_descriptor; > } > } > @@ -4620,7 +4669,7 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget > goto release_descriptor; > } > > - skb_copy_to_linear_data(skb, rx_buf, pkt_size); > + skb_copy_to_linear_data(skb, rx_buf + XDP_PACKET_HEADROOM, pkt_size); > skb->tail += pkt_size; > skb->len = pkt_size; > dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); > @@ -4640,6 +4689,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget > rtl8169_mark_to_asic(desc); > } > > + if (xdp_xmit & R8169_XDP_REDIR) > + xdp_do_flush(); > + > return count; > } > >