Re: [PATCH bpf-next] selftests/bpf: Try to address xdp_metadata crashes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jan 30, 2023 at 9:41 PM Alexei Starovoitov
<alexei.starovoitov@xxxxxxxxx> wrote:
>
> On Mon, Jan 30, 2023 at 1:51 PM Stanislav Fomichev <sdf@xxxxxxxxxx> wrote:
> >
> > Commit e04ce9f4040b ("selftests/bpf: Make crashes more debuggable in
> > test_progs") hasn't uncovered anything interesting besides
> > confirming that the test passes successfully, but crashes eventually [0].
> >
> > I'm assuming the crashes are coming from something overriding
> > the stack/heap. Probably from the xsk misuse. So I'm trying
> > a bunch of things to address that:
> >
> > - More debugging with real memory pointers for the queues/umem
> >   - To confirm that everything is sane
> > - Set proper tx/fill ring sizes
> >   - In particular, fill ring wasn't fully initialized, but I'm
> >     assuming no packets should be flowing there regardless
> >   - Do the same for xdp_hw_metadata
> > - Don't refill on tx completion; instead, only ack it
> >
> > 0: https://github.com/kernel-patches/bpf/actions/runs/4032162075/jobs/6931951300
> >
> > Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx>
> > ---
> >  .../selftests/bpf/prog_tests/xdp_metadata.c   | 36 +++++++++++++------
> >  tools/testing/selftests/bpf/xdp_hw_metadata.c |  4 +--
> >  2 files changed, 28 insertions(+), 12 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > index e033d48288c0..453b4045a9d1 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > @@ -54,11 +54,11 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> >         int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
> >         const struct xsk_socket_config socket_config = {
> >                 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > -               .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > +               .tx_size = UMEM_NUM / 2,
>
> I'm not following. Is this a fix or just random debug code?

This chunk is a potential fix. But the patch overall is a mix of
potential fixes + debug code.
I can't reproduce locally, so I'm trying a bunch of potential fixes +
adding more debugging in case it doesn't help.

> >                 .bind_flags = XDP_COPY,
> >         };
> >         const struct xsk_umem_config umem_config = {
> > -               .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > +               .fill_size = UMEM_NUM / 2,
> >                 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
> >                 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
> >                 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
> > @@ -88,13 +88,24 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> >         if (!ASSERT_OK(ret, "xsk_socket__create"))
> >                 return ret;
> >
> > +       printf("%p: umem=<%p..%p>\n", xsk, xsk->umem_area, xsk->umem_area + UMEM_SIZE);
> > +       printf("%p: fill=<%p..%p>\n", xsk, xsk->fill.ring,
> > +              xsk->fill.ring + xsk->fill.size * sizeof(__u64));
> > +       printf("%p: comp=<%p..%p>\n", xsk, xsk->comp.ring,
> > +              xsk->comp.ring + xsk->comp.size * sizeof(__u64));
> > +       printf("%p: rx=<%p..%p>\n", xsk, xsk->rx.ring,
> > +              xsk->rx.ring + xsk->rx.size * sizeof(struct xdp_desc));
> > +       printf("%p: tx=<%p..%p>\n", xsk, xsk->tx.ring,
> > +              xsk->tx.ring + xsk->tx.size * sizeof(struct xdp_desc));
> > +
>
> This is fine as debug.

Right. It should also be irrelevant for when the test passes since we
are writing this to /dev/null.

> >         /* First half of umem is for TX. This way address matches 1-to-1
> >          * to the completion queue index.
> >          */
> >
> >         for (i = 0; i < UMEM_NUM / 2; i++) {
> >                 addr = i * UMEM_FRAME_SIZE;
> > -               printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
> > +               printf("%p: tx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
> > +                      xsk_umem__get_data(xsk->umem_area, addr));
> >         }
> >
> >         /* Second half of umem is for RX. */
> > @@ -107,7 +118,10 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> >
> >         for (i = 0; i < UMEM_NUM / 2; i++) {
> >                 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
> > -               printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
> > +               printf("%p: rx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
> > +                      xsk_umem__get_data(xsk->umem_area, addr));
> > +               printf("%p: fill %lx at %p\n", xsk, addr,
> > +                      xsk_ring_prod__fill_addr(&xsk->fill, i));
> >                 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
> >         }
> >         xsk_ring_prod__submit(&xsk->fill, ret);
> > @@ -159,6 +173,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
> >         tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
> >         printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
> >         data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
> > +       printf("%p: tx %llx (%p) at %p\n", xsk, tx_desc->addr, data, tx_desc);
> >
> >         eth = data;
> >         iph = (void *)(eth + 1);
> > @@ -205,9 +220,8 @@ static void complete_tx(struct xsk *xsk)
> >         if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
> >                 addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
> >
> > -               printf("%p: refill idx=%u addr=%llx\n", xsk, idx, addr);
> > -               *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
> > -               xsk_ring_prod__submit(&xsk->fill, 1);
> > +               printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
> > +               xsk_ring_cons__release(&xsk->comp, 1);
>
> What does this do?

I was incorrectly refilling 'fill' ring on tx completion. Changing it
to "consume" the completion
(xsk_ring_cons__peek+xsk_ring_cons__release).

> >         }
> >  }
> >
> > @@ -216,7 +230,9 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
> >         __u32 idx;
> >
> >         if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
> > -               printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
> > +               printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
> > +               printf("%p: fill %llx at %p\n", xsk, addr,
> > +                      xsk_ring_prod__fill_addr(&xsk->fill, idx));
> >                 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
> >                 xsk_ring_prod__submit(&xsk->fill, 1);
> >         }
> > @@ -253,8 +269,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
> >         rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
> >         comp_addr = xsk_umem__extract_addr(rx_desc->addr);
> >         addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
> > -       printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
> > -              xsk, idx, rx_desc->addr, addr, comp_addr);
> > +       printf("%p: rx_desc[%u]->addr=%llx (%p) addr=%llx comp_addr=%llx\n",
> > +              xsk, idx, rx_desc->addr, rx_desc, addr, comp_addr);
> >         data = xsk_umem__get_data(xsk->umem_area, addr);
> >
> >         /* Make sure we got the packet offset correctly. */
> > diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > index 3823b1c499cc..6d715f85ea20 100644
> > --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > @@ -59,11 +59,11 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
> >         int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
> >         const struct xsk_socket_config socket_config = {
> >                 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > -               .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > +               .tx_size = UMEM_NUM / 2,
> >                 .bind_flags = XDP_COPY,
> >         };
> >         const struct xsk_umem_config umem_config = {
> > -               .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > +               .fill_size = UMEM_NUM / 2,
> >                 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
> >                 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
> >                 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
> > --
> > 2.39.1.456.gfc5497dd1b-goog
> >



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux