On Mon, Feb 06, 2023 at 12:08:37PM +0200, Vladimir Oltean wrote: Hey Vladimir, > Schedule NAPI by hand from enetc_xsk_wakeup(), and send frames from the > XSK TX queue from NAPI context. Add them to the completion queue from > the enetc_clean_tx_ring() procedure which is common for all kinds of > traffic. > > We reuse one of the TX rings for XDP (XDP_TX/XDP_REDIRECT) for XSK as > well. They are already cropped from the TX rings that the network stack > can use when XDP is enabled (with or without AF_XDP). > > As for XDP_REDIRECT calling enetc's ndo_xdp_xmit, I'm not sure if that > can run simultaneously with enetc_poll() (on different CPUs, but towards > the same TXQ). I guess it probably can, but idk what to do about it. > The problem is that enetc_xdp_xmit() sends to > priv->xdp_tx_ring[smp_processor_id()], while enetc_xsk_xmit() and XDP_TX > send to priv->xdp_tx_ring[NAPI instance]. So when the NAPI instance runs Why not use cpu id on the latter then? > on a different CPU that the one it is numerically equal to, we should > have a lock that serializes XDP_REDIRECT with the others. > > Signed-off-by: Vladimir Oltean <vladimir.oltean@xxxxxxx> > --- > drivers/net/ethernet/freescale/enetc/enetc.c | 102 ++++++++++++++++++- > drivers/net/ethernet/freescale/enetc/enetc.h | 2 + > 2 files changed, 99 insertions(+), 5 deletions(-) > > diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c > index 3990c006c011..bc0db788afc7 100644 > --- a/drivers/net/ethernet/freescale/enetc/enetc.c > +++ b/drivers/net/ethernet/freescale/enetc/enetc.c > @@ -84,7 +84,7 @@ static void enetc_free_tx_swbd(struct enetc_bdr *tx_ring, > struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd); > struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd); > > - if (tx_swbd->dma) > + if (!tx_swbd->is_xsk && tx_swbd->dma) > enetc_unmap_tx_buff(tx_ring, tx_swbd); > > if (xdp_frame) { > @@ -817,7 +817,8 @@ static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring, > rx_ring->xdp.xdp_tx_in_flight--; > } > > -static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) > +static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget, > + int *xsk_confirmed) > { > int tx_frm_cnt = 0, tx_byte_cnt = 0, tx_win_drop = 0; > struct net_device *ndev = tx_ring->ndev; > @@ -854,7 +855,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) > tx_win_drop++; > } > > - if (tx_swbd->is_xdp_tx) > + if (tx_swbd->is_xsk) > + (*xsk_confirmed)++; > + else if (tx_swbd->is_xdp_tx) > enetc_recycle_xdp_tx_buff(tx_ring, tx_swbd); > else if (likely(tx_swbd->dma)) > enetc_unmap_tx_buff(tx_ring, tx_swbd); > @@ -1465,6 +1468,58 @@ int enetc_xdp_xmit(struct net_device *ndev, int num_frames, > } > EXPORT_SYMBOL_GPL(enetc_xdp_xmit); > > +static void enetc_xsk_map_tx_desc(struct enetc_tx_swbd *tx_swbd, > + const struct xdp_desc *xsk_desc, > + struct xsk_buff_pool *pool) > +{ > + dma_addr_t dma; > + > + dma = xsk_buff_raw_get_dma(pool, xsk_desc->addr); > + xsk_buff_raw_dma_sync_for_device(pool, dma, xsk_desc->len); > + > + tx_swbd->dma = dma; > + tx_swbd->len = xsk_desc->len; > + tx_swbd->is_xsk = true; > + tx_swbd->is_eof = true; > +} > + > +static bool enetc_xsk_xmit(struct net_device *ndev, struct xsk_buff_pool *pool, > + u32 queue_id) > +{ > + struct enetc_ndev_priv *priv = netdev_priv(ndev); > + struct xdp_desc *xsk_descs = pool->tx_descs; > + struct enetc_tx_swbd tx_swbd = {0}; > + struct enetc_bdr *tx_ring; > + u32 budget, batch; > + int i, k; > + > + tx_ring = priv->xdp_tx_ring[queue_id]; > + > + /* Shouldn't race with anyone because we are running in the softirq > + * of the only CPU that sends packets to this TX ring > + */ > + budget = min(enetc_bd_unused(tx_ring) - 1, ENETC_XSK_TX_BATCH); > + > + batch = xsk_tx_peek_release_desc_batch(pool, budget); > + if (!batch) > + return true; > + > + i = tx_ring->next_to_use; > + > + for (k = 0; k < batch; k++) { > + enetc_xsk_map_tx_desc(&tx_swbd, &xsk_descs[k], pool); > + enetc_xdp_map_tx_buff(tx_ring, i, &tx_swbd, tx_swbd.len); > + enetc_bdr_idx_inc(tx_ring, &i); > + } > + > + tx_ring->next_to_use = i; > + > + xsk_tx_release(pool); xsk_tx_release() is not needed if you're using xsk_tx_peek_release_desc_batch() above, it will do this for you at the end of its job. > + enetc_update_tx_ring_tail(tx_ring); > + > + return budget != batch; > +} > + > static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, > struct xdp_buff *xdp_buff, u16 size) > { > @@ -1881,6 +1936,7 @@ static int enetc_poll(struct napi_struct *napi, int budget) > struct enetc_bdr *rx_ring = &v->rx_ring; > struct xsk_buff_pool *pool; > struct bpf_prog *prog; > + int xsk_confirmed = 0; > bool complete = true; > int work_done; > int i; > @@ -1888,7 +1944,8 @@ static int enetc_poll(struct napi_struct *napi, int budget) > enetc_lock_mdio(); > > for (i = 0; i < v->count_tx_rings; i++) > - if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) > + if (!enetc_clean_tx_ring(&v->tx_ring[i], budget, > + &xsk_confirmed)) > complete = false; > > prog = rx_ring->xdp.prog; > @@ -1901,6 +1958,17 @@ static int enetc_poll(struct napi_struct *napi, int budget) > else > work_done = enetc_clean_rx_ring(rx_ring, napi, budget); > > + if (pool) { > + if (xsk_confirmed) > + xsk_tx_completed(pool, xsk_confirmed); > + > + if (xsk_uses_need_wakeup(pool)) > + xsk_set_tx_need_wakeup(pool); > + > + if (!enetc_xsk_xmit(rx_ring->ndev, pool, rx_ring->index)) > + complete = false; > + } > + > if (work_done == budget) > complete = false; > if (work_done) > @@ -3122,7 +3190,31 @@ static int enetc_setup_xsk_pool(struct net_device *ndev, > > int enetc_xsk_wakeup(struct net_device *ndev, u32 queue_id, u32 flags) > { > - /* xp_assign_dev() wants this; nothing needed for RX */ > + struct enetc_ndev_priv *priv = netdev_priv(ndev); > + struct enetc_int_vector *v; > + struct enetc_bdr *rx_ring; > + > + if (!netif_running(ndev) || !netif_carrier_ok(ndev)) > + return -ENETDOWN; > + > + if (queue_id >= priv->bdr_int_num) > + return -ERANGE; > + > + v = priv->int_vector[queue_id]; > + rx_ring = &v->rx_ring; > + > + if (!rx_ring->xdp.xsk_pool || !rx_ring->xdp.prog) > + return -EINVAL; > + > + /* No way to kick TX by triggering a hardirq right away => > + * raise softirq. This might schedule NAPI on a CPU different than the > + * smp_affinity of its IRQ would suggest, but that would happen anyway > + * if, say, we change that affinity under heavy traffic. > + * So enetc_poll() has to be prepared for it anyway. > + */ > + if (!napi_if_scheduled_mark_missed(&v->napi)) > + napi_schedule(&v->napi); > + > return 0; > } > > diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h > index e1a746e37c9a..403f40473b52 100644 > --- a/drivers/net/ethernet/freescale/enetc/enetc.h > +++ b/drivers/net/ethernet/freescale/enetc/enetc.h > @@ -36,6 +36,7 @@ struct enetc_tx_swbd { > u8 is_eof:1; > u8 is_xdp_tx:1; > u8 is_xdp_redirect:1; > + u8 is_xsk:1; > u8 qbv_en:1; > }; > > @@ -86,6 +87,7 @@ struct enetc_xdp_data { > #define ENETC_RX_RING_DEFAULT_SIZE 2048 > #define ENETC_TX_RING_DEFAULT_SIZE 2048 > #define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2) > +#define ENETC_XSK_TX_BATCH ENETC_DEFAULT_TX_WORK > > struct enetc_bdr_resource { > /* Input arguments saved for teardown */ > -- > 2.34.1 >