The per-CPU variables used during bpf_prog_run_xdp() invocation and later during xdp_do_redirect() rely on disabled BH for their protection. Without locking in local_bh_disable() on PREEMPT_RT these data structure require explicit locking. This is a follow-up on the previous change which introduced bpf_run_lock.redirect_lock and uses it now within drivers. The simple way is to acquire the lock before bpf_prog_run_xdp() is invoked and hold it until the end of function. This does not always work because some drivers (cpsw, atlantic) invoke xdp_do_flush() in the same context. Acquiring the lock in bpf_prog_run_xdp() and dropping in xdp_do_redirect() (without touching drivers) does not work because not all driver, which use bpf_prog_run_xdp(), do support XDP_REDIRECT (and invoke xdp_do_redirect()). Ideally the minimal locking scope would be bpf_prog_run_xdp() + xdp_do_redirect() and everything else (error recovery, DMA unmapping, free/ alloc of memory, …) would happen outside of the locked section. Cc: Alexei Starovoitov <ast@xxxxxxxxxx> Cc: Jesper Dangaard Brouer <hawk@xxxxxxxxxx> Cc: Jesse Brandeburg <jesse.brandeburg@xxxxxxxxx> Cc: John Fastabend <john.fastabend@xxxxxxxxx> Cc: Tony Nguyen <anthony.l.nguyen@xxxxxxxxx> Cc: bpf@xxxxxxxxxxxxxxx (open list:XDP Cc: intel-wired-lan@xxxxxxxxxxxxxxxx Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 + drivers/net/ethernet/intel/i40e/i40e_xsk.c | 22 +++++++++-------- drivers/net/ethernet/intel/ice/ice_txrx.c | 1 + drivers/net/ethernet/intel/ice/ice_xsk.c | 21 ++++++++-------- drivers/net/ethernet/intel/igb/igb_main.c | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 5 +++- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 24 ++++++++++--------- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 ++- 9 files changed, 46 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index dd410b15000f7..76e069ae2183a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2326,6 +2326,7 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp, struct prefetchw(xdp->data_hard_start); /* xdp_frame write */ + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index e99fa854d17f1..2b0c0c1f3ddc8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -201,17 +201,19 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp, struct i40e_ring *xdp_ring; u32 act; - act = bpf_prog_run_xdp(xdp_prog, xdp); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + act = bpf_prog_run_xdp(xdp_prog, xdp); - if (likely(act == XDP_REDIRECT)) { - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - if (!err) - return I40E_XDP_REDIR; - if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) - result = I40E_XDP_EXIT; - else - result = I40E_XDP_CONSUMED; - goto out_failure; + if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + if (!err) + return I40E_XDP_REDIR; + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + result = I40E_XDP_EXIT; + else + result = I40E_XDP_CONSUMED; + goto out_failure; + } } switch (act) { diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 9e97ea8630686..5d4cfa3455b37 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -571,6 +571,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (!xdp_prog) goto exit; + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 99954508184f9..02f89c22d19e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -762,17 +762,18 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, int err, result = ICE_XDP_PASS; u32 act; + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { act = bpf_prog_run_xdp(xdp_prog, xdp); - - if (likely(act == XDP_REDIRECT)) { - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - if (!err) - return ICE_XDP_REDIR; - if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) - result = ICE_XDP_EXIT; - else - result = ICE_XDP_CONSUMED; - goto out_failure; + if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + if (!err) + return ICE_XDP_REDIR; + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + result = ICE_XDP_EXIT; + else + result = ICE_XDP_CONSUMED; + goto out_failure; + } } switch (act) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index b2295caa2f0ab..e01be809d030e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8621,6 +8621,7 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter, prefetchw(xdp->data_hard_start); /* xdp_frame write */ + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e9bb403bbacf9..8321419b3a307 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2485,7 +2485,10 @@ static int __igc_xdp_run_prog(struct igc_adapter *adapter, struct bpf_prog *prog, struct xdp_buff *xdp) { - u32 act = bpf_prog_run_xdp(prog, xdp); + u32 act; + + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); + act = bpf_prog_run_xdp(prog, xdp); switch (act) { case XDP_PASS: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 94bde2cad0f47..de564e8b83be2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2203,6 +2203,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, prefetchw(xdp->data_hard_start); /* xdp_frame write */ + guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock); act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 59798bc33298f..b988f758aad49 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -104,18 +104,20 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, struct xdp_frame *xdpf; u32 act; - xdp_prog = READ_ONCE(rx_ring->xdp_prog); - act = bpf_prog_run_xdp(xdp_prog, xdp); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) { + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + act = bpf_prog_run_xdp(xdp_prog, xdp); - if (likely(act == XDP_REDIRECT)) { - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - if (!err) - return IXGBE_XDP_REDIR; - if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) - result = IXGBE_XDP_EXIT; - else - result = IXGBE_XDP_CONSUMED; - goto out_failure; + if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + if (!err) + return IXGBE_XDP_REDIR; + if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + result = IXGBE_XDP_EXIT; + else + result = IXGBE_XDP_CONSUMED; + goto out_failure; + } } switch (act) { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a44e4bd561421..1c58c08aa15ff 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1059,7 +1059,8 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, if (!xdp_prog) goto xdp_out; - act = bpf_prog_run_xdp(xdp_prog, xdp); + scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) + act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: break; -- 2.43.0