The per-CPU variables used during bpf_prog_run_xdp() invocation and later during xdp_do_redirect() rely on disabled BH for their protection. Without locking in local_bh_disable() on PREEMPT_RT these data structure require explicit locking. This is a follow-up on the previous change which introduced bpf_run_lock.redirect_lock and uses it now within drivers. The simple way is to acquire the lock before bpf_prog_run_xdp() is invoked and hold it until the end of function. This does not always work because some drivers (cpsw, atlantic) invoke xdp_do_flush() in the same context. Acquiring the lock in bpf_prog_run_xdp() and dropping in xdp_do_redirect() (without touching drivers) does not work because not all driver, which use bpf_prog_run_xdp(), do support XDP_REDIRECT (and invoke xdp_do_redirect()). Ideally the minimal locking scope would be bpf_prog_run_xdp() + xdp_do_redirect() and everything else (error recovery, DMA unmapping, free/ alloc of memory, …) would happen outside of the locked section. Cc: Alexei Starovoitov <ast@xxxxxxxxxx> Cc: Clark Wang <xiaoning.wang@xxxxxxx> Cc: Claudiu Manoil <claudiu.manoil@xxxxxxx> Cc: Ioana Ciornei <ioana.ciornei@xxxxxxx> Cc: Jesper Dangaard Brouer <hawk@xxxxxxxxxx> Cc: John Fastabend <john.fastabend@xxxxxxxxx> Cc: Madalin Bucur <madalin.bucur@xxxxxxx> Cc: NXP Linux Team <linux-imx@xxxxxxx> Cc: Shenwei Wang <shenwei.wang@xxxxxxx> Cc: Vladimir Oltean <vladimir.oltean@xxxxxxx> Cc: Wei Fang <wei.fang@xxxxxxx> Cc: bpf@xxxxxxxxxxxxxxx Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> --- .../net/ethernet/freescale/dpaa/dpaa_eth.c | 1 + .../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 1 + .../net/ethernet/freescale/dpaa2/dpaa2-xsk.c | 30 ++++++++++--------- drivers/net/ethernet/freescale/enetc/enetc.c | 1 + drivers/net/ethernet/freescale/fec_main.c | 1 + 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index dcbc598b11c6c..8adc766282fde 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2597,6 +2597,7 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr, } #endif + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); /* Update the length and the offset of the FD */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 888509cf1f210..08be35a3e3de7 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -442,6 +442,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv, xdp_prepare_buff(&xdp, vaddr + offset, XDP_PACKET_HEADROOM, dpaa2_fd_get_len(fd), false); + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); /* xdp.data pointer may have changed */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c index 051748b997f3f..e3ae9de6b0a34 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-xsk.c @@ -56,23 +56,25 @@ static u32 dpaa2_xsk_run_xdp(struct dpaa2_eth_priv *priv, xdp_buff->rxq = &ch->xdp_rxq; xsk_buff_dma_sync_for_cpu(xdp_buff, ch->xsk_pool); - xdp_act = bpf_prog_run_xdp(xdp_prog, xdp_buff); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + xdp_act = bpf_prog_run_xdp(xdp_prog, xdp_buff); - /* xdp.data pointer may have changed */ - dpaa2_fd_set_offset(fd, xdp_buff->data - vaddr); - dpaa2_fd_set_len(fd, xdp_buff->data_end - xdp_buff->data); + /* xdp.data pointer may have changed */ + dpaa2_fd_set_offset(fd, xdp_buff->data - vaddr); + dpaa2_fd_set_len(fd, xdp_buff->data_end - xdp_buff->data); - if (likely(xdp_act == XDP_REDIRECT)) { - err = xdp_do_redirect(priv->net_dev, xdp_buff, xdp_prog); - if (unlikely(err)) { - ch->stats.xdp_drop++; - dpaa2_eth_recycle_buf(priv, ch, addr); - } else { - ch->buf_count--; - ch->stats.xdp_redirect++; + if (likely(xdp_act == XDP_REDIRECT)) { + err = xdp_do_redirect(priv->net_dev, xdp_buff, xdp_prog); + if (unlikely(err)) { + ch->stats.xdp_drop++; + dpaa2_eth_recycle_buf(priv, ch, addr); + } else { + ch->buf_count--; + ch->stats.xdp_redirect++; + } + + goto xdp_redir; } - - goto xdp_redir; } switch (xdp_act) { diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index cffbf27c4656b..d516b28815af4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1578,6 +1578,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, rx_byte_cnt += VLAN_HLEN; rx_byte_cnt += xdp_get_buff_len(&xdp_buff); + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); xdp_act = bpf_prog_run_xdp(prog, &xdp_buff); switch (xdp_act) { diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c3b7694a74851..335b1e307d468 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1587,6 +1587,7 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog, int err; u32 act; + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(prog, xdp); /* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover -- 2.43.0