Storing a reference to the XDP socket in the netdev_rx_queue structure makes a single socket accessible without requiring a lookup in the XSKMAP. A future commit will introduce the XDP_REDIRECT_XSK action which indicates to use this reference instead of performing the lookup. Since an rx ring is required for redirection, only store the reference if an rx ring is configured. When multiple sockets exist for a given context (netdev, qid), a reference is not stored because in this case we fallback to the default behavior of using the XSKMAP to redirect the packets. Signed-off-by: Ciara Loftus <ciara.loftus@xxxxxxxxx> --- include/linux/netdevice.h | 2 ++ net/xdp/xsk.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ec42495a43a..1ad2491f0391 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -736,6 +736,8 @@ struct netdev_rx_queue { struct net_device *dev; #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; + struct xdp_sock *xsk; + refcount_t xsk_refcnt; #endif } ____cacheline_aligned_in_smp; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index f16074eb53c7..94ee524b9ca8 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -728,6 +728,30 @@ static void xsk_unbind_dev(struct xdp_sock *xs) /* Wait for driver to stop using the xdp socket. */ xp_del_xsk(xs->pool, xs); + if (xs->rx) { + if (refcount_read(&dev->_rx[xs->queue_id].xsk_refcnt) == 1) { + refcount_set(&dev->_rx[xs->queue_id].xsk_refcnt, 0); + WRITE_ONCE(xs->dev->_rx[xs->queue_id].xsk, NULL); + } else { + refcount_dec(&dev->_rx[xs->queue_id].xsk_refcnt); + /* If the refcnt returns to one again store the reference to the + * remaining socket in the netdev_rx_queue. + */ + if (refcount_read(&dev->_rx[xs->queue_id].xsk_refcnt) == 1) { + struct net *net = dev_net(dev); + struct xdp_sock *xsk; + struct sock *sk; + + mutex_lock(&net->xdp.lock); + sk = sk_head(&net->xdp.list); + xsk = xdp_sk(sk); + mutex_lock(&xsk->mutex); + WRITE_ONCE(xs->dev->_rx[xs->queue_id].xsk, xsk); + mutex_unlock(&xsk->mutex); + mutex_unlock(&net->xdp.lock); + } + } + } xs->dev = NULL; synchronize_net(); dev_put(dev); @@ -972,6 +996,16 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->queue_id = qid; xp_add_xsk(xs->pool, xs); + if (xs->rx) { + if (refcount_read(&dev->_rx[xs->queue_id].xsk_refcnt) == 0) { + WRITE_ONCE(dev->_rx[qid].xsk, xs); + refcount_set(&dev->_rx[qid].xsk_refcnt, 1); + } else { + refcount_inc(&dev->_rx[qid].xsk_refcnt); + WRITE_ONCE(dev->_rx[qid].xsk, NULL); + } + } + out_unlock: if (err) { dev_put(dev); -- 2.17.1