[PATCH net-next 16/24] net: netkit, veth, tun, virt*: Use nested-BH locking for XDP redirect.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The per-CPU variables used during bpf_prog_run_xdp() invocation and
later during xdp_do_redirect() rely on disabled BH for their protection.
Without locking in local_bh_disable() on PREEMPT_RT these data structure
require explicit locking.

This is a follow-up on the previous change which introduced
bpf_run_lock.redirect_lock and uses it now within drivers.

The simple way is to acquire the lock before bpf_prog_run_xdp() is
invoked and hold it until the end of function.
This does not always work because some drivers (cpsw, atlantic) invoke
xdp_do_flush() in the same context.
Acquiring the lock in bpf_prog_run_xdp() and dropping in
xdp_do_redirect() (without touching drivers) does not work because not
all driver, which use bpf_prog_run_xdp(), do support XDP_REDIRECT (and
invoke xdp_do_redirect()).

Ideally the minimal locking scope would be bpf_prog_run_xdp() +
xdp_do_redirect() and everything else (error recovery, DMA unmapping,
free/ alloc of memory, …) would happen outside of the locked section.

Cc: "K. Y. Srinivasan" <kys@xxxxxxxxxxxxx>
Cc: "Michael S. Tsirkin" <mst@xxxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Andrii Nakryiko <andrii@xxxxxxxxxx>
Cc: Dexuan Cui <decui@xxxxxxxxxxxxx>
Cc: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>
Cc: Hao Luo <haoluo@xxxxxxxxxx>
Cc: Jesper Dangaard Brouer <hawk@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: John Fastabend <john.fastabend@xxxxxxxxx>
Cc: Juergen Gross <jgross@xxxxxxxx>
Cc: KP Singh <kpsingh@xxxxxxxxxx>
Cc: Martin KaFai Lau <martin.lau@xxxxxxxxx>
Cc: Nikolay Aleksandrov <razor@xxxxxxxxxxxxx>
Cc: Song Liu <song@xxxxxxxxxx>
Cc: Stanislav Fomichev <sdf@xxxxxxxxxx>
Cc: Stefano Stabellini <sstabellini@xxxxxxxxxx>
Cc: Wei Liu <wei.liu@xxxxxxxxxx>
Cc: Willem de Bruijn <willemdebruijn.kernel@xxxxxxxxx>
Cc: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx>
Cc: Yonghong Song <yonghong.song@xxxxxxxxx>
Cc: bpf@xxxxxxxxxxxxxxx
Cc: virtualization@xxxxxxxxxxxxxxx
Cc: xen-devel@xxxxxxxxxxxxxxxxxxxx
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
 drivers/net/hyperv/netvsc_bpf.c |  1 +
 drivers/net/netkit.c            | 13 +++++++----
 drivers/net/tun.c               | 28 +++++++++++++----------
 drivers/net/veth.c              | 40 ++++++++++++++++++++-------------
 drivers/net/virtio_net.c        |  1 +
 drivers/net/xen-netfront.c      |  1 +
 6 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index 4a9522689fa4f..55f8ca92ca199 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -58,6 +58,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
 
 	memcpy(xdp->data, data, len);
 
+	guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
 	act = bpf_prog_run_xdp(prog, xdp);
 
 	switch (act) {
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 39171380ccf29..fbcf78477bda8 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -80,8 +80,15 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
 	netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
 	skb->dev = peer;
 	entry = rcu_dereference(nk->active);
-	if (entry)
-		ret = netkit_run(entry, skb, ret);
+	if (entry) {
+		scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+			ret = netkit_run(entry, skb, ret);
+			if (ret == NETKIT_REDIRECT) {
+				dev_sw_netstats_tx_add(dev, 1, len);
+				skb_do_redirect(skb);
+			}
+		}
+	}
 	switch (ret) {
 	case NETKIT_NEXT:
 	case NETKIT_PASS:
@@ -95,8 +102,6 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		break;
 	case NETKIT_REDIRECT:
-		dev_sw_netstats_tx_add(dev, 1, len);
-		skb_do_redirect(skb);
 		break;
 	case NETKIT_DROP:
 	default:
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index afa5497f7c35c..fe0d31f11e4b6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1708,16 +1708,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 		xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
 		xdp_prepare_buff(&xdp, buf, pad, len, false);
 
-		act = bpf_prog_run_xdp(xdp_prog, &xdp);
-		if (act == XDP_REDIRECT || act == XDP_TX) {
-			get_page(alloc_frag->page);
-			alloc_frag->offset += buflen;
-		}
-		err = tun_xdp_act(tun, xdp_prog, &xdp, act);
-		if (err < 0) {
-			if (act == XDP_REDIRECT || act == XDP_TX)
-				put_page(alloc_frag->page);
-			goto out;
+		scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+			act = bpf_prog_run_xdp(xdp_prog, &xdp);
+			if (act == XDP_REDIRECT || act == XDP_TX) {
+				get_page(alloc_frag->page);
+				alloc_frag->offset += buflen;
+			}
+			err = tun_xdp_act(tun, xdp_prog, &xdp, act);
+			if (err < 0) {
+				if (act == XDP_REDIRECT || act == XDP_TX)
+					put_page(alloc_frag->page);
+				goto out;
+			}
 		}
 
 		if (err == XDP_REDIRECT)
@@ -2460,8 +2462,10 @@ static int tun_xdp_one(struct tun_struct *tun,
 		xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
 		xdp_set_data_meta_invalid(xdp);
 
-		act = bpf_prog_run_xdp(xdp_prog, xdp);
-		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
+		scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+			act = bpf_prog_run_xdp(xdp_prog, xdp);
+			ret = tun_xdp_act(tun, xdp_prog, xdp, act);
+		}
 		if (ret < 0) {
 			put_page(virt_to_head_page(xdp->data));
 			return ret;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 977861c46b1fe..c69e5ff9f8795 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -624,7 +624,18 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
 		xdp->rxq = &rq->xdp_rxq;
 		vxbuf.skb = NULL;
 
-		act = bpf_prog_run_xdp(xdp_prog, xdp);
+		scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+			act = bpf_prog_run_xdp(xdp_prog, xdp);
+			if (act == XDP_REDIRECT) {
+				orig_frame = *frame;
+				xdp->rxq->mem = frame->mem;
+				if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
+					frame = &orig_frame;
+					stats->xdp_drops++;
+					goto err_xdp;
+				}
+			}
+		}
 
 		switch (act) {
 		case XDP_PASS:
@@ -644,13 +655,6 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
 			rcu_read_unlock();
 			goto xdp_xmit;
 		case XDP_REDIRECT:
-			orig_frame = *frame;
-			xdp->rxq->mem = frame->mem;
-			if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
-				frame = &orig_frame;
-				stats->rx_drops++;
-				goto err_xdp;
-			}
 			stats->xdp_redirect++;
 			rcu_read_unlock();
 			goto xdp_xmit;
@@ -857,7 +861,18 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	orig_data = xdp->data;
 	orig_data_end = xdp->data_end;
 
-	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+		act = bpf_prog_run_xdp(xdp_prog, xdp);
+		if (act == XDP_REDIRECT) {
+			veth_xdp_get(xdp);
+			consume_skb(skb);
+			xdp->rxq->mem = rq->xdp_mem;
+			if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
+				stats->rx_drops++;
+				goto err_xdp;
+			}
+		}
+	}
 
 	switch (act) {
 	case XDP_PASS:
@@ -875,13 +890,6 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 		rcu_read_unlock();
 		goto xdp_xmit;
 	case XDP_REDIRECT:
-		veth_xdp_get(xdp);
-		consume_skb(skb);
-		xdp->rxq->mem = rq->xdp_mem;
-		if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
-			stats->rx_drops++;
-			goto err_xdp;
-		}
 		stats->xdp_redirect++;
 		rcu_read_unlock();
 		goto xdp_xmit;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d16f592c2061f..5e362c4604239 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1010,6 +1010,7 @@ static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
 	int err;
 	u32 act;
 
+	guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 	u64_stats_inc(&stats->xdp_packets);
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ad29f370034e4..e3daa8cdeb84e 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -978,6 +978,7 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata,
 	xdp_prepare_buff(xdp, page_address(pdata), XDP_PACKET_HEADROOM,
 			 len, false);
 
+	guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
 	act = bpf_prog_run_xdp(prog, xdp);
 	switch (act) {
 	case XDP_TX:
-- 
2.43.0






[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux