Re: [PATCH bpf-next v2 1/2] selftests/bpf: Support checks against a regular expression.

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Thu, 6 Jun 2024 10:47:51 -0700

On Thu, Jun 6, 2024 at 6:30 AM Cupertino Miranda
<cupertino.miranda@xxxxxxxxxx> wrote:
>
> Add support for __regex and __regex_unpriv macros to check the test
> execution output against a regular expression. This is similar to __msg
> and __msg_unpriv, however those expect full text matching.
>
> Signed-off-by: Cupertino Miranda <cupertino.miranda@xxxxxxxxxx>
> Cc: jose.marchesi@xxxxxxxxxx
> Cc: david.faust@xxxxxxxxxx
> Cc: Yonghong Song <yonghong.song@xxxxxxxxx>
> Cc: Eduard Zingerman <eddyz87@xxxxxxxxx>
> Cc: Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx>
> ---
>  tools/testing/selftests/bpf/progs/bpf_misc.h |  11 +-
>  tools/testing/selftests/bpf/test_loader.c    | 143 +++++++++++++++----
>  2 files changed, 121 insertions(+), 33 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
> index fb2f5513e29e..c0280bd2f340 100644
> --- a/tools/testing/selftests/bpf/progs/bpf_misc.h
> +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
> @@ -7,9 +7,9 @@
>   *
>   * The test_loader sequentially loads each program in a skeleton.
>   * Programs could be loaded in privileged and unprivileged modes.
> - * - __success, __failure, __msg imply privileged mode;
> - * - __success_unpriv, __failure_unpriv, __msg_unpriv imply
> - *   unprivileged mode.
> + * - __success, __failure, __msg, __regex imply privileged mode;
> + * - __success_unpriv, __failure_unpriv, __msg_unpriv, __regex_unpriv
> + *   imply unprivileged mode.
>   * If combination of privileged and unprivileged attributes is present
>   * both modes are used. If none are present privileged mode is implied.
>   *
> @@ -24,6 +24,9 @@
>   *                   Multiple __msg attributes could be specified.
>   * __msg_unpriv      Same as __msg but for unprivileged mode.
>   *
> + * __regex           Same as __msg, but using a regular expression.
> + * __regex_unpriv    Same as __msg_unpriv but using a regular expression.
> + *
>   * __success         Expect program load success in privileged mode.
>   * __success_unpriv  Expect program load success in unprivileged mode.
>   *
> @@ -59,10 +62,12 @@
>   * __auxiliary_unpriv  Same, but load program in unprivileged mode.
>   */
>  #define __msg(msg)             __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
> +#define __regex(regex)         __attribute__((btf_decl_tag("comment:test_expect_regex=" regex)))
>  #define __failure              __attribute__((btf_decl_tag("comment:test_expect_failure")))
>  #define __success              __attribute__((btf_decl_tag("comment:test_expect_success")))
>  #define __description(desc)    __attribute__((btf_decl_tag("comment:test_description=" desc)))
>  #define __msg_unpriv(msg)      __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" msg)))
> +#define __regex_unpriv(regex)  __attribute__((btf_decl_tag("comment:test_expect_regex_unpriv=" regex)))
>  #define __failure_unpriv       __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
>  #define __success_unpriv       __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
>  #define __log_level(lvl)       __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
> diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
> index 524c38e9cde4..a9a7f5f55855 100644
> --- a/tools/testing/selftests/bpf/test_loader.c
> +++ b/tools/testing/selftests/bpf/test_loader.c
> @@ -2,6 +2,7 @@
>  /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
>  #include <linux/capability.h>
>  #include <stdlib.h>
> +#include <regex.h>
>  #include <test_progs.h>
>  #include <bpf/btf.h>
>
> @@ -17,9 +18,11 @@
>  #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
>  #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
>  #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
> +#define TEST_TAG_EXPECT_REGEX_PFX "comment:test_expect_regex="
>  #define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
>  #define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
>  #define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
> +#define TEST_TAG_EXPECT_REGEX_PFX_UNPRIV "comment:test_expect_regex_unpriv="
>  #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
>  #define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
>  #define TEST_TAG_DESCRIPTION_PFX "comment:test_description="
> @@ -46,10 +49,26 @@ enum mode {
>         UNPRIV = 2
>  };
>
> +enum message_type {
> +       SUBSTRING = 0,
> +       REGEX
> +};
> +struct expect_msg {
> +       union {
> +               const char *substring;
> +               struct {
> +                       const char *expr;
> +                       regex_t regex;
> +                       bool failed_to_compile;

you are not even using this field

> +               } regex;
> +       };
> +       enum message_type type;

I think this is an overkill. We are unlikely to have third type of
"match message" spec, so let's do just simple:

struct expect_msg {
    const char *substr; /* substring match */

    const char *regex_str; /* regex-based match */
    regex_t regex;
};

No unions, no nesting. substr is NULL for regex, otherwise regex_str
is NULL (for substring case). Simple.

> +};
> +
>  struct test_subspec {
>         char *name;
>         bool expect_failure;
> -       const char **expect_msgs;
> +       struct expect_msg *expect_msg;

it's still messages (plural), why renaming the field?

>         size_t expect_msg_cnt;
>         int retval;
>         bool execute;
> @@ -89,28 +108,58 @@ void test_loader_fini(struct test_loader *tester)
>
>  static void free_test_spec(struct test_spec *spec)
>  {
> +       int i;
> +
> +       /* Delalocate regex from expect_msg array. */

typo: deallocate

> +       for (i = 0; i < spec->priv.expect_msg_cnt; i++)
> +               if (spec->priv.expect_msg[i].type == REGEX)
> +                       regfree(&spec->priv.expect_msg[i].regex.regex);

and then:

if (spec->priv.expect_msg[i].regex_str)
    regfree(...);

Also, where is unpriv handling?

> +
>         free(spec->priv.name);
>         free(spec->unpriv.name);
> -       free(spec->priv.expect_msgs);
> -       free(spec->unpriv.expect_msgs);
> +       free(spec->priv.expect_msg);
> +       free(spec->unpriv.expect_msg);
>
>         spec->priv.name = NULL;
>         spec->unpriv.name = NULL;
> -       spec->priv.expect_msgs = NULL;
> -       spec->unpriv.expect_msgs = NULL;
> +       spec->priv.expect_msg = NULL;
> +       spec->unpriv.expect_msg = NULL;
>  }
>
> -static int push_msg(const char *msg, struct test_subspec *subspec)
> +static int push_msg(const char *match, enum message_type msg_type, struct test_subspec *subspec)

here we can just pass two mutually exclusive strings to specify what case it is:

static int push_msg(const char *substr, const char *regex_str, struct
test_subspec *subspec)

>  {
>         void *tmp;
> +       int regcomp_res;
> +       char error_msg[100];
> +       struct expect_msg *em;

we don't need both tmp and em, let's keep just

struct expect_msg *msg;

>
> -       tmp = realloc(subspec->expect_msgs, (1 + subspec->expect_msg_cnt) * sizeof(void *));
> +       tmp = realloc(subspec->expect_msg,
> +                     (1 + subspec->expect_msg_cnt) * sizeof(struct expect_msg));

then `msg = realloc(..)` here

>         if (!tmp) {
>                 ASSERT_FAIL("failed to realloc memory for messages\n");
>                 return -ENOMEM;
>         }
> -       subspec->expect_msgs = tmp;
> -       subspec->expect_msgs[subspec->expect_msg_cnt++] = msg;
> +       subspec->expect_msg = tmp;
> +       em = &subspec->expect_msg[subspec->expect_msg_cnt];

and reuse msg = &subspec->... here after assigning `subspec->expected_msg = msg`

> +       subspec->expect_msg_cnt += 1;
> +
> +       em->type = msg_type;
> +       switch (msg_type) {
> +       case SUBSTRING:
> +               em->substring = match;
> +               break;
> +       case REGEX:
> +               em->regex.expr = match;
> +               regcomp_res = regcomp(&em->regex.regex, match, REG_EXTENDED|REG_NEWLINE);
> +               if (regcomp_res != 0) {
> +                       regerror(regcomp_res, &em->regex.regex, error_msg, 100);
> +                       fprintf(stderr, "Regexp compilation error in '%s': '%s'\n",
> +                               match, error_msg);
> +                       ASSERT_FAIL("failed to compile regex\n");
> +                       return -EINVAL;
> +               }
> +               break;
> +       }

and then

if (substr) {
    msg->substr = substr;
} else {
    char err_msg[100];
    int err;

    err = regcomp(...);
    if (err) {
        ...
    }
}

Note I moved err_msg/err closer to the only place where they are being
used (and kept names less verbose).

>
>         return 0;
>  }
> @@ -233,13 +282,25 @@ static int parse_test_spec(struct test_loader *tester,
>                         spec->mode_mask |= UNPRIV;
>                 } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
>                         msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
> -                       err = push_msg(msg, &spec->priv);
> +                       err = push_msg(msg, SUBSTRING, &spec->priv);
>                         if (err)
>                                 goto cleanup;
>                         spec->mode_mask |= PRIV;
>                 } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV)) {
>                         msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX_UNPRIV) - 1;
> -                       err = push_msg(msg, &spec->unpriv);
> +                       err = push_msg(msg, SUBSTRING, &spec->unpriv);
> +                       if (err)
> +                               goto cleanup;
> +                       spec->mode_mask |= UNPRIV;
> +               } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX)) {
> +                       msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX) - 1;
> +                       err = push_msg(msg, REGEX, &spec->priv);
> +                       if (err)
> +                               goto cleanup;
> +                       spec->mode_mask |= PRIV;
> +               } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX_UNPRIV)) {
> +                       msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX_UNPRIV) - 1;
> +                       err = push_msg(msg, REGEX, &spec->unpriv);
>                         if (err)
>                                 goto cleanup;
>                         spec->mode_mask |= UNPRIV;
> @@ -336,16 +397,16 @@ static int parse_test_spec(struct test_loader *tester,
>                         spec->unpriv.execute = spec->priv.execute;
>                 }
>
> -               if (!spec->unpriv.expect_msgs) {
> -                       size_t sz = spec->priv.expect_msg_cnt * sizeof(void *);
> +               if (!spec->unpriv.expect_msg) {
> +                       size_t sz = spec->priv.expect_msg_cnt * sizeof(struct expect_msg);
>
> -                       spec->unpriv.expect_msgs = malloc(sz);
> -                       if (!spec->unpriv.expect_msgs) {
> -                               PRINT_FAIL("failed to allocate memory for unpriv.expect_msgs\n");
> +                       spec->unpriv.expect_msg = malloc(sz);
> +                       if (!spec->unpriv.expect_msg) {
> +                               PRINT_FAIL("failed to allocate memory for unpriv.expect\n");
>                                 err = -ENOMEM;
>                                 goto cleanup;
>                         }
> -                       memcpy(spec->unpriv.expect_msgs, spec->priv.expect_msgs, sz);
> +                       memcpy(spec->unpriv.expect_msg, spec->priv.expect_msg, sz);

hmm.. this is problematic given regcomp() and regfree(). We don't know
what sort of internal state libc will maintain in regex_t, so it's
unsafe to just memcpy() priv into unpriv like this. We probably need
to refactor this into an explicit push_msg() calls for spec->unpriv,
which is, luckily, trivial to do.

>                         spec->unpriv.expect_msg_cnt = spec->priv.expect_msg_cnt;
>                 }
>         }
> @@ -402,27 +463,49 @@ static void validate_case(struct test_loader *tester,
>                           struct bpf_program *prog,
>                           int load_err)
>  {
> -       int i, j;
> +       int i, j, reg_error;

"err" is totally fine, no need to be so verbose here

> +       char *match;
> +       regmatch_t reg_match[1];
>
>         for (i = 0; i < subspec->expect_msg_cnt; i++) {
> -               char *match;
> -               const char *expect_msg;
> -
> -               expect_msg = subspec->expect_msgs[i];
> +               struct expect_msg *em = &subspec->expect_msg[i];

msg to say consistent?

> +
> +               match = NULL;
> +               switch (em->type) {
> +               case SUBSTRING:
> +                       match = strstr(tester->log_buf + tester->next_match_pos, em->substring);
> +                       tester->next_match_pos = match - tester->log_buf + strlen(em->substring);

match will be NULL if strstr() doesn't find substring

> +                       break;
> +               case REGEX:
> +                       reg_error = regexec(&em->regex.regex,
> +                                           tester->log_buf + tester->next_match_pos,
> +                                           1, reg_match, 0);
> +                       if (reg_error == 0)
> +                               match = tester->log_buf + tester->next_match_pos + reg_match[0].rm_so;
> +                       tester->next_match_pos += reg_match[0].rm_eo;

Similarly, if we didn't find a match, we should not update position

> +                       break;
> +               }
>
> -               match = strstr(tester->log_buf + tester->next_match_pos, expect_msg);
>                 if (!ASSERT_OK_PTR(match, "expect_msg")) {
> -                       /* if we are in verbose mode, we've already emitted log */
>                         if (env.verbosity == VERBOSE_NONE)
>                                 emit_verifier_log(tester->log_buf, true /*force*/);
> -                       for (j = 0; j < i; j++)
> -                               fprintf(stderr,
> -                                       "MATCHED  MSG: '%s'\n", subspec->expect_msgs[j]);
> -                       fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg);
> +                       for (j = 0; j <= i; j++) {
> +                               const char *header = (j < i) ? "MATCHED" : "EXPECTED";
> +                               struct expect_msg *tmp = &subspec->expect_msg[j];

reuse msg variable?

> +
> +                               switch (tmp->type) {
> +                               case SUBSTRING:
> +                                       fprintf(stderr,
> +                                               "%s  MSG: '%s'\n", header, tmp->substring);
> +                                       break;
> +                               case REGEX:
> +                                       fprintf(stderr,
> +                                               "%s  REGEX: '%s'\n", header, tmp->regex.expr);
> +                                       break;
> +                               }

it's probably better to have just one fprintf handling everything, it
will be easier to follow

fprintf(stderr, "%s %s: '%s'\n",
        j < i ? "MATCHED " : "EXPECTED",
        msg->substr ? "SUBSTR" : " REGEX",
        msg->substr ?: msg->regex_str);

(note spaces for alignment)

pw-bot: cr

> +                       }
>                         return;
>                 }
> -
> -               tester->next_match_pos = match - tester->log_buf + strlen(expect_msg);
>         }
>  }
>
> --
> 2.39.2
>