Re: [RFC PATCH v8 17/20] selftests: Add a basic fifo qdisc test

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, May 21, 2024 at 8:03 AM Amery Hung <ameryhung@xxxxxxxxx> wrote:
>
> On Mon, May 20, 2024 at 8:15 PM Stanislav Fomichev <sdf@xxxxxxxxxx> wrote:
> >
> > On 05/10, Amery Hung wrote:
> > > This selftest shows a bare minimum fifo qdisc, which simply enqueues skbs
> > > into the back of a bpf list and dequeues from the front of the list.
> > >
> > > Signed-off-by: Amery Hung <amery.hung@xxxxxxxxxxxxx>
> > > ---
> > >  .../selftests/bpf/prog_tests/bpf_qdisc.c      | 161 ++++++++++++++++++
> > >  .../selftests/bpf/progs/bpf_qdisc_common.h    |  23 +++
> > >  .../selftests/bpf/progs/bpf_qdisc_fifo.c      |  83 +++++++++
> > >  3 files changed, 267 insertions(+)
> > >  create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> > >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> > >  create mode 100644 tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> > >
> > > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> > > new file mode 100644
> > > index 000000000000..295d0216e70f
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
> > > @@ -0,0 +1,161 @@
> > > +#include <linux/pkt_sched.h>
> > > +#include <linux/rtnetlink.h>
> > > +#include <test_progs.h>
> > > +
> > > +#include "network_helpers.h"
> > > +#include "bpf_qdisc_fifo.skel.h"
> > > +
> > > +#ifndef ENOTSUPP
> > > +#define ENOTSUPP 524
> > > +#endif
> > > +
> > > +#define LO_IFINDEX 1
> > > +
> > > +static const unsigned int total_bytes = 10 * 1024 * 1024;
> > > +static int stop;
> > > +
> > > +static void *server(void *arg)
> > > +{
> > > +     int lfd = (int)(long)arg, err = 0, fd;
> > > +     ssize_t nr_sent = 0, bytes = 0;
> > > +     char batch[1500];
> > > +
> > > +     fd = accept(lfd, NULL, NULL);
> > > +     while (fd == -1) {
> > > +             if (errno == EINTR)
> > > +                     continue;
> > > +             err = -errno;
> > > +             goto done;
> > > +     }
> > > +
> > > +     if (settimeo(fd, 0)) {
> > > +             err = -errno;
> > > +             goto done;
> > > +     }
> > > +
> > > +     while (bytes < total_bytes && !READ_ONCE(stop)) {
> > > +             nr_sent = send(fd, &batch,
> > > +                            MIN(total_bytes - bytes, sizeof(batch)), 0);
> > > +             if (nr_sent == -1 && errno == EINTR)
> > > +                     continue;
> > > +             if (nr_sent == -1) {
> > > +                     err = -errno;
> > > +                     break;
> > > +             }
> > > +             bytes += nr_sent;
> > > +     }
> > > +
> > > +     ASSERT_EQ(bytes, total_bytes, "send");
> > > +
> > > +done:
> > > +     if (fd >= 0)
> > > +             close(fd);
> > > +     if (err) {
> > > +             WRITE_ONCE(stop, 1);
> > > +             return ERR_PTR(err);
> > > +     }
> > > +     return NULL;
> > > +}
> > > +
> > > +static void do_test(char *qdisc)
> > > +{
> > > +     DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
> > > +                         .attach_point = BPF_TC_QDISC,
> > > +                         .parent = TC_H_ROOT,
> > > +                         .handle = 0x8000000,
> > > +                         .qdisc = qdisc);
> > > +     struct sockaddr_in6 sa6 = {};
> > > +     ssize_t nr_recv = 0, bytes = 0;
> > > +     int lfd = -1, fd = -1;
> > > +     pthread_t srv_thread;
> > > +     socklen_t addrlen = sizeof(sa6);
> > > +     void *thread_ret;
> > > +     char batch[1500];
> > > +     int err;
> > > +
> > > +     WRITE_ONCE(stop, 0);
> > > +
> > > +     err = bpf_tc_hook_create(&hook);
> > > +     if (!ASSERT_OK(err, "attach qdisc"))
> > > +             return;
> > > +
> > > +     lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
> > > +     if (!ASSERT_NEQ(lfd, -1, "socket")) {
> > > +             bpf_tc_hook_destroy(&hook);
> > > +             return;
> > > +     }
> > > +
> > > +     fd = socket(AF_INET6, SOCK_STREAM, 0);
> > > +     if (!ASSERT_NEQ(fd, -1, "socket")) {
> > > +             bpf_tc_hook_destroy(&hook);
> > > +             close(lfd);
> > > +             return;
> > > +     }
> > > +
> > > +     if (settimeo(lfd, 0) || settimeo(fd, 0))
> > > +             goto done;
> > > +
> > > +     err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
> > > +     if (!ASSERT_NEQ(err, -1, "getsockname"))
> > > +             goto done;
> > > +
> > > +     /* connect to server */
> > > +     err = connect(fd, (struct sockaddr *)&sa6, addrlen);
> > > +     if (!ASSERT_NEQ(err, -1, "connect"))
> > > +             goto done;
> > > +
> > > +     err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
> > > +     if (!ASSERT_OK(err, "pthread_create"))
> > > +             goto done;
> > > +
> > > +     /* recv total_bytes */
> > > +     while (bytes < total_bytes && !READ_ONCE(stop)) {
> > > +             nr_recv = recv(fd, &batch,
> > > +                            MIN(total_bytes - bytes, sizeof(batch)), 0);
> > > +             if (nr_recv == -1 && errno == EINTR)
> > > +                     continue;
> > > +             if (nr_recv == -1)
> > > +                     break;
> > > +             bytes += nr_recv;
> > > +     }
> > > +
> > > +     ASSERT_EQ(bytes, total_bytes, "recv");
> > > +
> > > +     WRITE_ONCE(stop, 1);
> > > +     pthread_join(srv_thread, &thread_ret);
> > > +     ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
> > > +
> > > +done:
> > > +     close(lfd);
> > > +     close(fd);
> > > +
> > > +     bpf_tc_hook_destroy(&hook);
> > > +     return;
> > > +}
> > > +
> > > +static void test_fifo(void)
> > > +{
> > > +     struct bpf_qdisc_fifo *fifo_skel;
> > > +     struct bpf_link *link;
> > > +
> > > +     fifo_skel = bpf_qdisc_fifo__open_and_load();
> > > +     if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
> > > +             return;
> > > +
> > > +     link = bpf_map__attach_struct_ops(fifo_skel->maps.fifo);
> > > +     if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
> > > +             bpf_qdisc_fifo__destroy(fifo_skel);
> > > +             return;
> > > +     }
> > > +
> > > +     do_test("bpf_fifo");
> > > +
> > > +     bpf_link__destroy(link);
> > > +     bpf_qdisc_fifo__destroy(fifo_skel);
> > > +}
> > > +
> > > +void test_bpf_qdisc(void)
> > > +{
> > > +     if (test__start_subtest("fifo"))
> > > +             test_fifo();
> > > +}
> > > diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> > > new file mode 100644
> > > index 000000000000..96ab357de28e
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
> > > @@ -0,0 +1,23 @@
> > > +#ifndef _BPF_QDISC_COMMON_H
> > > +#define _BPF_QDISC_COMMON_H
> > > +
> > > +#define NET_XMIT_SUCCESS        0x00
> > > +#define NET_XMIT_DROP           0x01    /* skb dropped                  */
> > > +#define NET_XMIT_CN             0x02    /* congestion notification      */
> > > +
> > > +#define TC_PRIO_CONTROL  7
> > > +#define TC_PRIO_MAX      15
> > > +
> > > +void bpf_skb_set_dev(struct sk_buff *skb, struct Qdisc *sch) __ksym;
> > > +u32 bpf_skb_get_hash(struct sk_buff *p) __ksym;
> > > +void bpf_skb_release(struct sk_buff *p) __ksym;
> > > +void bpf_qdisc_skb_drop(struct sk_buff *p, struct bpf_sk_buff_ptr *to_free) __ksym;
> > > +void bpf_qdisc_watchdog_schedule(struct Qdisc *sch, u64 expire, u64 delta_ns) __ksym;
> > > +bool bpf_qdisc_find_class(struct Qdisc *sch, u32 classid) __ksym;
> > > +int bpf_qdisc_create_child(struct Qdisc *sch, u32 min,
> > > +                        struct netlink_ext_ack *extack) __ksym;
> > > +int bpf_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, u32 classid,
> > > +                   struct bpf_sk_buff_ptr *to_free_list) __ksym;
> > > +struct sk_buff *bpf_qdisc_dequeue(struct Qdisc *sch, u32 classid) __ksym;
> > > +
> > > +#endif
> > > diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> > > new file mode 100644
> > > index 000000000000..433fd9c3639c
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
> > > @@ -0,0 +1,83 @@
> > > +#include <vmlinux.h>
> > > +#include "bpf_experimental.h"
> > > +#include "bpf_qdisc_common.h"
> > > +
> > > +char _license[] SEC("license") = "GPL";
> > > +
> > > +#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
> > > +
> > > +private(B) struct bpf_spin_lock q_fifo_lock;
> > > +private(B) struct bpf_list_head q_fifo __contains_kptr(sk_buff, bpf_list);
> > > +
> > > +unsigned int q_limit = 1000;
> > > +unsigned int q_qlen = 0;
> > > +
> > > +SEC("struct_ops/bpf_fifo_enqueue")
> > > +int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
> > > +          struct bpf_sk_buff_ptr *to_free)
> > > +{
> > > +     q_qlen++;
> > > +     if (q_qlen > q_limit) {
> > > +             bpf_qdisc_skb_drop(skb, to_free);
> > > +             return NET_XMIT_DROP;
> > > +     }
> >
> > [..]
> >
> > > +     bpf_spin_lock(&q_fifo_lock);
> > > +     bpf_list_excl_push_back(&q_fifo, &skb->bpf_list);
> > > +     bpf_spin_unlock(&q_fifo_lock);
> >
> > Can you also expand a bit on the locking here and elsewhere? And how it
> > interplays with TCQ_F_NOLOCK?
> >
> > As I mentioned at lsfmmbpf, I don't think there is a lot of similar
> > locking in the existing C implementations? So why do we need it here?
>
> The locks are required to prevent catastrophic concurrent accesses to
> bpf graphs. The verifier will check 1) if there is a spin_lock in the
> same struct with a list head or rbtree root, and 2) the lock is held
> when accessing the list or rbtree.
>
> Since we have the safety guarantee provided by the verifier, I think
> there is an opportunity to allow qdisc users to set TCQ_F_NOLOCK. I will
> check if qdisc kfuncs are TCQ_F_NOLOCK safe though. Let me know if I
> missed anything.

Ah, so these locking constraints come from the verifier....
In this case, yes, it would be nice to have special treatment for bpf
qdisc (or maybe allow passing TCQ_F_NOLOCK somehow). If the verifier
enforces the locking for the underlying data structures, we should try
to remove the ones from the generic qdisc layer.





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux