On Wed, Apr 14, 2021 at 08:26:07PM +0800, Hangbin Liu wrote: [ ... ] > diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c > index aa516472ce46..3980fb3bfb09 100644 > --- a/kernel/bpf/devmap.c > +++ b/kernel/bpf/devmap.c > @@ -57,6 +57,7 @@ struct xdp_dev_bulk_queue { > struct list_head flush_node; > struct net_device *dev; > struct net_device *dev_rx; > + struct bpf_prog *xdp_prog; > unsigned int count; > }; > > @@ -326,22 +327,71 @@ bool dev_map_can_have_prog(struct bpf_map *map) > return false; > } > > +static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, > + struct xdp_frame **frames, int n, > + struct net_device *dev) > +{ > + struct xdp_txq_info txq = { .dev = dev }; > + struct xdp_buff xdp; > + int i, nframes = 0; > + > + for (i = 0; i < n; i++) { > + struct xdp_frame *xdpf = frames[i]; > + u32 act; > + int err; > + > + xdp_convert_frame_to_buff(xdpf, &xdp); > + xdp.txq = &txq; > + > + act = bpf_prog_run_xdp(xdp_prog, &xdp); > + switch (act) { > + case XDP_PASS: > + err = xdp_update_frame_from_buff(&xdp, xdpf); > + if (unlikely(err < 0)) > + xdp_return_frame_rx_napi(xdpf); > + else > + frames[nframes++] = xdpf; > + break; > + default: > + bpf_warn_invalid_xdp_action(act); > + fallthrough; > + case XDP_ABORTED: > + trace_xdp_exception(dev, xdp_prog, act); > + fallthrough; > + case XDP_DROP: > + xdp_return_frame_rx_napi(xdpf); > + break; > + } > + } > + return nframes; /* sent frames count */ > +} > + > static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) > { > struct net_device *dev = bq->dev; > - int sent = 0, err = 0; > + int sent = 0, drops = 0, err = 0; > + unsigned int cnt = bq->count; > + int to_send = cnt; > int i; > > - if (unlikely(!bq->count)) > + if (unlikely(!cnt)) > return; > > - for (i = 0; i < bq->count; i++) { > + for (i = 0; i < cnt; i++) { > struct xdp_frame *xdpf = bq->q[i]; > > prefetch(xdpf); > } > > - sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags); > + if (bq->xdp_prog) { bq->xdp_prog is used here > + to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev); > + if (!to_send) > + goto out; > + > + drops = cnt - to_send; > + } > + [ ... ] > static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, > - struct net_device *dev_rx) > + struct net_device *dev_rx, struct bpf_prog *xdp_prog) > { > struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); > struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); > @@ -412,18 +466,22 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, > /* Ingress dev_rx will be the same for all xdp_frame's in > * bulk_queue, because bq stored per-CPU and must be flushed > * from net_device drivers NAPI func end. > + * > + * Do the same with xdp_prog and flush_list since these fields > + * are only ever modified together. > */ > - if (!bq->dev_rx) > + if (!bq->dev_rx) { > bq->dev_rx = dev_rx; > + bq->xdp_prog = xdp_prog; bp->xdp_prog is assigned here and could be used later in bq_xmit_all(). How is bq->xdp_prog protected? Are they all under one rcu_read_lock()? It is not very obvious after taking a quick look at xdp_do_flush[_map]. e.g. what if the devmap elem gets deleted. [ ... ] > static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, > - struct net_device *dev_rx) > + struct net_device *dev_rx, > + struct bpf_prog *xdp_prog) > { > struct xdp_frame *xdpf; > int err; > @@ -439,42 +497,14 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, > if (unlikely(!xdpf)) > return -EOVERFLOW; > > - bq_enqueue(dev, xdpf, dev_rx); > + bq_enqueue(dev, xdpf, dev_rx, xdp_prog); > return 0; > } > [ ... ] > @@ -482,12 +512,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, > { > struct net_device *dev = dst->dev; > > - if (dst->xdp_prog) { > - xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog); > - if (!xdp) > - return 0; > - } > - return __xdp_enqueue(dev, xdp, dev_rx); > + return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog); > }