On 08/15, Tirthendu Sarkar wrote: > xsk_build_skb_zerocopy() may return an error other than -EAGAIN and this > is received as skb and used later in xsk_set_destructor_arg() and > xsk_drop_skb() which must operate on a valid skb. > > Set -EOVERFLOW as error when MAX_SKB_FRAGS are exceeded and packet needs > to be dropped and use this to distinguish against all other error cases > where allocation needs to be retried. > > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@xxxxxxxxx> > Reported-by: kernel test robot <lkp@xxxxxxxxx> > Reported-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx> > Closes: https://lore.kernel.org/r/202307210434.OjgqFcbB-lkp@xxxxxxxxx/ > Fixes: cf24f5a5feea ("xsk: add support for AF_XDP multi-buffer on Tx path") > > Changelog: > v1 -> v2: > - Removed err as a parameter to xsk_build_skb_zerocopy() > [Stanislav Fomichev] > - use explicit error to distinguish packet drop vs retry > --- > net/xdp/xsk.c | 22 +++++++++++++--------- > 1 file changed, 13 insertions(+), 9 deletions(-) > > diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c > index fcfc8472f73d..55f8b9b0e06d 100644 > --- a/net/xdp/xsk.c > +++ b/net/xdp/xsk.c > @@ -602,7 +602,7 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, > > for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) { > if (unlikely(i >= MAX_SKB_FRAGS)) > - return ERR_PTR(-EFAULT); > + return ERR_PTR(-EOVERFLOW); > > page = pool->umem->pgs[addr >> PAGE_SHIFT]; > get_page(page); > @@ -655,15 +655,17 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, > skb_put(skb, len); > > err = skb_store_bits(skb, 0, buffer, len); > - if (unlikely(err)) > + if (unlikely(err)) { > + kfree_skb(skb); > goto free_err; > + } > } else { > int nr_frags = skb_shinfo(skb)->nr_frags; > struct page *page; > u8 *vaddr; > > if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) { > - err = -EFAULT; > + err = -EOVERFLOW; > goto free_err; > } > > @@ -690,12 +692,14 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, > return skb; > > free_err: > - if (err == -EAGAIN) { > - xsk_cq_cancel_locked(xs, 1); > - } else { > - xsk_set_destructor_arg(skb); > - xsk_drop_skb(skb); > + if (err == -EOVERFLOW) { Don't think this will work? We have some other error paths in xsk_build_skb that are not -EOVERFLOW that still need kfree_skb, right? I feel like we are trying to share some state between xsk_build_skb and xsk_build_skb_zerocopy which we really shouldn't share. So how about we try to have a separate cleanup path in xsk_build_skb_zerocopy? Will something like the following (untested / uncompiled) work instead? IOW, ideally, xsk_build_skb should look like: if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) { return xsk_build_skb_zerocopy(xs, desc); } else { return xsk_build_skb_copy(xs, desc); /* ^^ current path that should really be a separate func */ } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 79a96019b7ef..747dd012afdb 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -578,6 +578,19 @@ static void xsk_drop_skb(struct sk_buff *skb) xsk_consume_skb(skb); } +static struct sk_buff *xsk_cleanup_skb(int err, struct sk_buff *skb, struct xdp_sock *xs) +{ + if (err == -EAGAIN) { + xsk_cq_cancel_locked(xs, 1); + } else { + xsk_set_destructor_arg(skb); + xsk_drop_skb(skb); + xskq_cons_release(xs->tx); + } + + return ERR_PTR(err); +} + static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, struct xdp_desc *desc) { @@ -593,8 +606,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, hr = max(NET_SKB_PAD, L1_CACHE_ALIGN(xs->dev->needed_headroom)); skb = sock_alloc_send_skb(&xs->sk, hr, 1, &err); - if (unlikely(!skb)) - return ERR_PTR(err); + if (unlikely(!skb)) { + err = ERR_PTR(err); + goto free_err; + } skb_reserve(skb, hr); } @@ -608,8 +623,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, addr = buffer - pool->addrs; for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) { - if (unlikely(i >= MAX_SKB_FRAGS)) - return ERR_PTR(-EFAULT); + if (unlikely(i >= MAX_SKB_FRAGS)) { + err = ERR_PTR(-EFAULT); + goto free_err; + } page = pool->umem->pgs[addr >> PAGE_SHIFT]; get_page(page); @@ -629,6 +646,9 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, refcount_add(ts, &xs->sk.sk_wmem_alloc); return skb; + +free_err: + return xsk_cleanup_skb(err, skb, xs); } static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, @@ -641,11 +661,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, int err; if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) { - skb = xsk_build_skb_zerocopy(xs, desc); - if (IS_ERR(skb)) { - err = PTR_ERR(skb); - goto free_err; - } + return xsk_build_skb_zerocopy(xs, desc); } else { u32 hr, tr, len; void *buffer; @@ -729,15 +745,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, return skb; free_err: - if (err == -EAGAIN) { - xsk_cq_cancel_locked(xs, 1); - } else { - xsk_set_destructor_arg(skb); - xsk_drop_skb(skb); - xskq_cons_release(xs->tx); - } - - return ERR_PTR(err); + return xsk_cleanup_skb(err, skb, xs); } static int __xsk_generic_xmit(struct sock *sk)