Re: [PATCH v3 bpf-next 14/14] selftests/bpf: add __noinline variant of cls_redirect selftest

Lorenz Bauer <lmb@xxxxxxxxxxxxxx> · Fri, 4 Sep 2020 08:53:59 +0100



On Thu, 3 Sep 2020 at 21:36, Andrii Nakryiko <andriin@xxxxxx> wrote:
>
> As one of the most complicated and close-to-real-world programs, cls_redirect
> is a good candidate to exercise libbpf's logic of handling bpf2bpf calls. So
> add variant with using explicit __noinline for majority of functions except
> few most basic ones. If those few functions are inlined, verifier starts to
> complain about program instruction limit of 1mln instructions being exceeded,
> most probably due to instruction overhead of doing a sub-program call.
> Convert user-space part of selftest to have to sub-tests: with and without
> inlining.

Acked-by: Lorenz Bauer <lmb@xxxxxxxxxxxxxx>

>
> Cc: Lorenz Bauer <lmb@xxxxxxxxxxxxxx>
> Signed-off-by: Andrii Nakryiko <andriin@xxxxxx>
> ---
>  .../selftests/bpf/prog_tests/cls_redirect.c   |  72 +++++++++---
>  .../selftests/bpf/progs/test_cls_redirect.c   | 105 ++++++++++--------
>  .../bpf/progs/test_cls_redirect_subprogs.c    |   2 +
>  3 files changed, 115 insertions(+), 64 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
> index f259085cca6a..9781d85cb223 100644
> --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
> +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
> @@ -12,10 +12,13 @@
>
>  #include "progs/test_cls_redirect.h"
>  #include "test_cls_redirect.skel.h"
> +#include "test_cls_redirect_subprogs.skel.h"
>
>  #define ENCAP_IP INADDR_LOOPBACK
>  #define ENCAP_PORT (1234)
>
> +static int duration = 0;
> +
>  struct addr_port {
>         in_port_t port;
>         union {
> @@ -361,30 +364,18 @@ static void close_fds(int *fds, int n)
>                         close(fds[i]);
>  }
>
> -void test_cls_redirect(void)
> +static void test_cls_redirect_common(struct bpf_program *prog)
>  {
> -       struct test_cls_redirect *skel = NULL;
>         struct bpf_prog_test_run_attr tattr = {};
>         int families[] = { AF_INET, AF_INET6 };
>         struct sockaddr_storage ss;
>         struct sockaddr *addr;
>         socklen_t slen;
>         int i, j, err;
> -
>         int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
>         int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
>         struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
>
> -       skel = test_cls_redirect__open();
> -       if (CHECK_FAIL(!skel))
> -               return;
> -
> -       skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
> -       skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
> -
> -       if (CHECK_FAIL(test_cls_redirect__load(skel)))
> -               goto cleanup;
> -
>         addr = (struct sockaddr *)&ss;
>         for (i = 0; i < ARRAY_SIZE(families); i++) {
>                 slen = prepare_addr(&ss, families[i]);
> @@ -402,7 +393,7 @@ void test_cls_redirect(void)
>                         goto cleanup;
>         }
>
> -       tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
> +       tattr.prog_fd = bpf_program__fd(prog);
>         for (i = 0; i < ARRAY_SIZE(tests); i++) {
>                 struct test_cfg *test = &tests[i];
>
> @@ -450,7 +441,58 @@ void test_cls_redirect(void)
>         }
>
>  cleanup:
> -       test_cls_redirect__destroy(skel);
>         close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
>         close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
>  }
> +
> +static void test_cls_redirect_inlined(void)
> +{
> +       struct test_cls_redirect *skel;
> +       int err;
> +
> +       skel = test_cls_redirect__open();
> +       if (CHECK(!skel, "skel_open", "failed\n"))
> +               return;
> +
> +       skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
> +       skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
> +
> +       err = test_cls_redirect__load(skel);
> +       if (CHECK(err, "skel_load", "failed: %d\n", err))
> +               goto cleanup;
> +
> +       test_cls_redirect_common(skel->progs.cls_redirect);
> +
> +cleanup:
> +       test_cls_redirect__destroy(skel);
> +}
> +
> +static void test_cls_redirect_subprogs(void)
> +{
> +       struct test_cls_redirect_subprogs *skel;
> +       int err;
> +
> +       skel = test_cls_redirect_subprogs__open();
> +       if (CHECK(!skel, "skel_open", "failed\n"))
> +               return;
> +
> +       skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
> +       skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
> +
> +       err = test_cls_redirect_subprogs__load(skel);
> +       if (CHECK(err, "skel_load", "failed: %d\n", err))
> +               goto cleanup;
> +
> +       test_cls_redirect_common(skel->progs.cls_redirect);
> +
> +cleanup:
> +       test_cls_redirect_subprogs__destroy(skel);
> +}
> +
> +void test_cls_redirect(void)
> +{
> +       if (test__start_subtest("cls_redirect_inlined"))
> +               test_cls_redirect_inlined();
> +       if (test__start_subtest("cls_redirect_subprogs"))
> +               test_cls_redirect_subprogs();
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
> index f0b72e86bee5..c9f8464996ea 100644
> --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
> +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
> @@ -22,6 +22,12 @@
>
>  #include "test_cls_redirect.h"
>
> +#ifdef SUBPROGS
> +#define INLINING __noinline
> +#else
> +#define INLINING __always_inline
> +#endif
> +
>  #define offsetofend(TYPE, MEMBER) \
>         (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
>
> @@ -125,7 +131,7 @@ typedef struct buf {
>         uint8_t *const tail;
>  } buf_t;
>
> -static size_t buf_off(const buf_t *buf)
> +static __always_inline size_t buf_off(const buf_t *buf)
>  {
>         /* Clang seems to optimize constructs like
>          *    a - b + c
> @@ -145,7 +151,7 @@ static size_t buf_off(const buf_t *buf)
>         return off;
>  }
>
> -static bool buf_copy(buf_t *buf, void *dst, size_t len)
> +static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
>  {
>         if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
>                 return false;
> @@ -155,7 +161,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len)
>         return true;
>  }
>
> -static bool buf_skip(buf_t *buf, const size_t len)
> +static __always_inline bool buf_skip(buf_t *buf, const size_t len)
>  {
>         /* Check whether off + len is valid in the non-linear part. */
>         if (buf_off(buf) + len > buf->skb->len) {
> @@ -173,7 +179,7 @@ static bool buf_skip(buf_t *buf, const size_t len)
>   * If scratch is not NULL, the function will attempt to load non-linear
>   * data via bpf_skb_load_bytes. On success, scratch is returned.
>   */
> -static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
> +static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
>  {
>         if (buf->head + len > buf->tail) {
>                 if (scratch == NULL) {
> @@ -188,7 +194,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
>         return ptr;
>  }
>
> -static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
> +static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
>  {
>         if (ipv4->ihl <= 5) {
>                 return true;
> @@ -197,13 +203,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
>         return buf_skip(buf, (ipv4->ihl - 5) * 4);
>  }
>
> -static bool ipv4_is_fragment(const struct iphdr *ip)
> +static INLINING bool ipv4_is_fragment(const struct iphdr *ip)
>  {
>         uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
>         return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
>  }
>
> -static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
> +static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
>  {
>         struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
>         if (ipv4 == NULL) {
> @@ -222,7 +228,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
>  }
>
>  /* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
> -static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
> +static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
>  {
>         if (!buf_copy(pkt, ports, sizeof(*ports))) {
>                 return false;
> @@ -237,7 +243,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
>         return true;
>  }
>
> -static uint16_t pkt_checksum_fold(uint32_t csum)
> +static INLINING uint16_t pkt_checksum_fold(uint32_t csum)
>  {
>         /* The highest reasonable value for an IPv4 header
>          * checksum requires two folds, so we just do that always.
> @@ -247,7 +253,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum)
>         return (uint16_t)~csum;
>  }
>
> -static void pkt_ipv4_checksum(struct iphdr *iph)
> +static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
>  {
>         iph->check = 0;
>
> @@ -268,10 +274,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph)
>         iph->check = pkt_checksum_fold(acc);
>  }
>
> -static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
> -                                           const struct ipv6hdr *ipv6,
> -                                           uint8_t *upper_proto,
> -                                           bool *is_fragment)
> +static INLINING
> +bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
> +                                    const struct ipv6hdr *ipv6,
> +                                    uint8_t *upper_proto,
> +                                    bool *is_fragment)
>  {
>         /* We understand five extension headers.
>          * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
> @@ -336,7 +343,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
>   * scratch is allocated on the stack. However, this usage should be safe since
>   * it's the callers stack after all.
>   */
> -static inline __attribute__((__always_inline__)) struct ipv6hdr *
> +static __always_inline struct ipv6hdr *
>  pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
>                bool *is_fragment)
>  {
> @@ -354,20 +361,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
>
>  /* Global metrics, per CPU
>   */
> -struct bpf_map_def metrics_map SEC("maps") = {
> -       .type = BPF_MAP_TYPE_PERCPU_ARRAY,
> -       .key_size = sizeof(unsigned int),
> -       .value_size = sizeof(metrics_t),
> -       .max_entries = 1,
> -};
> -
> -static metrics_t *get_global_metrics(void)
> +struct {
> +       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
> +       __uint(max_entries, 1);
> +       __type(key, unsigned int);
> +       __type(value, metrics_t);
> +} metrics_map SEC(".maps");
> +
> +static INLINING metrics_t *get_global_metrics(void)
>  {
>         uint64_t key = 0;
>         return bpf_map_lookup_elem(&metrics_map, &key);
>  }
>
> -static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
> +static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
>  {
>         const int payload_off =
>                 sizeof(*encap) +
> @@ -388,8 +395,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
>         return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
>  }
>
> -static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
> -                             struct in_addr *next_hop, metrics_t *metrics)
> +static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
> +                                      struct in_addr *next_hop, metrics_t *metrics)
>  {
>         metrics->forwarded_packets_total_gre++;
>
> @@ -509,8 +516,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
>         return bpf_redirect(skb->ifindex, 0);
>  }
>
> -static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
> -                                struct in_addr *next_hop, metrics_t *metrics)
> +static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
> +                                         struct in_addr *next_hop, metrics_t *metrics)
>  {
>         /* swap L2 addresses */
>         /* This assumes that packets are received from a router.
> @@ -546,7 +553,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
>         return bpf_redirect(skb->ifindex, 0);
>  }
>
> -static ret_t skip_next_hops(buf_t *pkt, int n)
> +static INLINING ret_t skip_next_hops(buf_t *pkt, int n)
>  {
>         switch (n) {
>         case 1:
> @@ -566,8 +573,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n)
>   * pkt is positioned just after the variable length GLB header
>   * iff the call is successful.
>   */
> -static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
> -                         struct in_addr *next_hop)
> +static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
> +                                  struct in_addr *next_hop)
>  {
>         if (encap->unigue.next_hop > encap->unigue.hop_count) {
>                 return TC_ACT_SHOT;
> @@ -601,8 +608,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
>   * return value, and calling code works while still being "generic" to
>   * IPv4 and IPv6.
>   */
> -static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
> -                          uint64_t iphlen, uint16_t sport, uint16_t dport)
> +static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
> +                                   uint64_t iphlen, uint16_t sport, uint16_t dport)
>  {
>         switch (iphlen) {
>         case sizeof(struct iphdr): {
> @@ -630,9 +637,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
>         }
>  }
>
> -static verdict_t classify_tcp(struct __sk_buff *skb,
> -                             struct bpf_sock_tuple *tuple, uint64_t tuplen,
> -                             void *iph, struct tcphdr *tcp)
> +static INLINING verdict_t classify_tcp(struct __sk_buff *skb,
> +                                      struct bpf_sock_tuple *tuple, uint64_t tuplen,
> +                                      void *iph, struct tcphdr *tcp)
>  {
>         struct bpf_sock *sk =
>                 bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
> @@ -663,8 +670,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb,
>         return UNKNOWN;
>  }
>
> -static verdict_t classify_udp(struct __sk_buff *skb,
> -                             struct bpf_sock_tuple *tuple, uint64_t tuplen)
> +static INLINING verdict_t classify_udp(struct __sk_buff *skb,
> +                                      struct bpf_sock_tuple *tuple, uint64_t tuplen)
>  {
>         struct bpf_sock *sk =
>                 bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
> @@ -681,9 +688,9 @@ static verdict_t classify_udp(struct __sk_buff *skb,
>         return UNKNOWN;
>  }
>
> -static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
> -                              struct bpf_sock_tuple *tuple, uint64_t tuplen,
> -                              metrics_t *metrics)
> +static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
> +                                       struct bpf_sock_tuple *tuple, uint64_t tuplen,
> +                                       metrics_t *metrics)
>  {
>         switch (proto) {
>         case IPPROTO_TCP:
> @@ -698,7 +705,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
>         }
>  }
>
> -static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
> +static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
>  {
>         struct icmphdr icmp;
>         if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
> @@ -745,7 +752,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
>                              sizeof(tuple.ipv4), metrics);
>  }
>
> -static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
> +static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
>  {
>         struct icmp6hdr icmp6;
>         if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
> @@ -797,8 +804,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
>                              metrics);
>  }
>
> -static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
> -                            metrics_t *metrics)
> +static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
> +                                     metrics_t *metrics)
>  {
>         metrics->l4_protocol_packets_total_tcp++;
>
> @@ -819,8 +826,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
>         return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
>  }
>
> -static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
> -                            metrics_t *metrics)
> +static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
> +                                     metrics_t *metrics)
>  {
>         metrics->l4_protocol_packets_total_udp++;
>
> @@ -837,7 +844,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
>         return classify_udp(pkt->skb, &tuple, tuplen);
>  }
>
> -static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
> +static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
>  {
>         metrics->l3_protocol_packets_total_ipv4++;
>
> @@ -874,7 +881,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
>         }
>  }
>
> -static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
> +static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
>  {
>         metrics->l3_protocol_packets_total_ipv6++;
>
> diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
> new file mode 100644
> index 000000000000..eed26b70e3a2
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
> @@ -0,0 +1,2 @@
> +#define SUBPROGS
> +#include "test_cls_redirect.c"
> --
> 2.24.1
>


-- 
Lorenz Bauer  |  Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK

www.cloudflare.com