Re: [PATCH bpf-next] selftests/bpf: add a verifier scale test with unknown bounded loop

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Fri, 26 Feb 2021 13:08:55 -0800

On Fri, Feb 26, 2021 at 9:50 AM Yonghong Song <yhs@xxxxxx> wrote:
>
> The orignal bcc pull request
>   https://github.com/iovisor/bcc/pull/3270
> exposed a verifier failure with Clang 12/13 while
> Clang 4 works fine. Further investigation exposed two issues.
>   Issue 1: LLVM may generate code which uses less refined
>      value. The issue is fixed in llvm patch
>      https://reviews.llvm.org/D97479
>   Issue 2: Spills with initial value 0 are marked as precise
>      which makes later state pruning less effective.
>      This is my rough initial analysis and further investigation
>      is needed to find how to improve verifier pruning
>      in such cases.
>
> With the above llvm patch, for the new loop6.c test, which has
> smaller loop bound compared to original test, I got
>   $ test_progs -s -n 10/16
>   ...
>   stack depth 64
>   processed 405099 insns (limit 1000000) max_states_per_insn 92
>       total_states 8866 peak_states 889 mark_read 6
>   #10/16 loop6.o:OK
>
> Use the original loop bound, i.e., commenting out "#define WORKAROUND",
> I got
>   $ test_progs -s -n 10/16
>   ...
>   BPF program is too large. Processed 1000001 insn
>   stack depth 64
>   processed 1000001 insns (limit 1000000) max_states_per_insn 91
>       total_states 23176 peak_states 5069 mark_read 6
>   ...
>   #10/16 loop6.o:FAIL
>
> The purpose of this patch is to provide a regression
> test for the above llvm fix and also provide a test
> case for further analyzing the verifier pruning issue.
>
> Cc: zhenwei pi <pizhenwei@xxxxxxxxxxxxx>
> Signed-off-by: Yonghong Song <yhs@xxxxxx>
> ---
>  tools/testing/selftests/bpf/README.rst        |  39 +++++++
>  .../bpf/prog_tests/bpf_verif_scale.c          |   1 +
>  tools/testing/selftests/bpf/progs/loop6.c     | 101 ++++++++++++++++++
>  3 files changed, 141 insertions(+)
>  create mode 100644 tools/testing/selftests/bpf/progs/loop6.c
>
> diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
> index fd148b8410fa..dbc8f6cc5c67 100644
> --- a/tools/testing/selftests/bpf/README.rst
> +++ b/tools/testing/selftests/bpf/README.rst
> @@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
>
>  __  https://reviews.llvm.org/D78466
>
> +bpf_verif_scale/loop6.o test failure with Clang 12
> +==================================================
> +
> +With Clang 12, the following bpf_verif_scale test failed:
> +  * ``bpf_verif_scale/loop6.o``
> +
> +The verifier output looks like
> +
> +.. code-block:: c
> +
> +  R1 type=ctx expected=fp
> +  The sequence of 8193 jumps is too complex.
> +
> +The reason is compiler generating the following code
> +
> +.. code-block:: c
> +
> +  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
> +      14:       16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
> +      15:       bc 51 00 00 00 00 00 00 w1 = w5
> +      16:       04 01 00 00 ff ff ff ff w1 += -1
> +      17:       67 05 00 00 20 00 00 00 r5 <<= 32
> +      18:       77 05 00 00 20 00 00 00 r5 >>= 32
> +      19:       a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
> +      20:       b7 05 00 00 06 00 00 00 r5 = 6
> +  00000000000000a8 <LBB0_4>:
> +      21:       b7 02 00 00 00 00 00 00 r2 = 0
> +      22:       b7 01 00 00 00 00 00 00 r1 = 0
> +  ;       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
> +      23:       7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
> +      24:       7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
> +
> +Note that insn #15 has w1 = w5 and w1 is refined later but
> +r5(w5) is eventually saved on stack at insn #24 for later use.
> +This cause later verifier failure. The bug has been `fixed`__ in
> +Clang 13.
> +
> +__  https://reviews.llvm.org/D97479
> +
>  BPF CO-RE-based tests and Clang version
>  =======================================
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> index e698ee6bb6c2..3d002c245d2b 100644
> --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
> @@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
>                 { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
>                 { "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
>                 { "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
> +               { "loop6.o", BPF_PROG_TYPE_KPROBE },
>
>                 /* partial unroll. 19k insn in a loop.
>                  * Total program size 20.8k insn.
> diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
> new file mode 100644
> index 000000000000..fe535922bed8
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/loop6.c
> @@ -0,0 +1,101 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include <linux/ptrace.h>
> +#include <stddef.h>
> +#include <linux/bpf.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +char _license[] SEC("license") = "GPL";
> +
> +/* typically virtio scsi has max SGs of 6 */
> +#define VIRTIO_MAX_SGS 6
> +
> +/* Verifier will fail with SG_MAX = 128. The failure can be
> + * workarounded with a smaller SG_MAX, e.g. 10.
> + */
> +#define WORKAROUND
> +#ifdef WORKAROUND
> +#define SG_MAX         10
> +#else
> +/* typically virtio blk has max SEG of 128 */
> +#define SG_MAX         128
> +#endif
> +
> +#define SG_CHAIN       0x01UL
> +#define SG_END         0x02UL
> +
> +struct scatterlist {
> +       unsigned long   page_link;
> +       unsigned int    offset;
> +       unsigned int    length;
> +};
> +
> +#define sg_is_chain(sg)                ((sg)->page_link & SG_CHAIN)
> +#define sg_is_last(sg)         ((sg)->page_link & SG_END)
> +#define sg_chain_ptr(sg)       \
> +       ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
> +
> +static inline struct scatterlist *__sg_next(struct scatterlist *sgp)

nit: here and below, it doesn't have to be inline, does it?

> +{
> +       struct scatterlist sg;
> +
> +       bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
> +       if (sg_is_last(&sg))
> +               return NULL;
> +
> +       sgp++;
> +
> +       bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
> +       if (sg_is_chain(&sg))
> +               sgp = sg_chain_ptr(&sg);
> +
> +       return sgp;
> +}
> +
> +static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
> +{
> +       struct scatterlist *sgp;
> +
> +       bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
> +       return sgp;
> +}
> +
> +int config = 0;
> +int result = 0;
> +
> +SEC("kprobe/virtqueue_add_sgs")
> +int nested_loops(volatile struct pt_regs* ctx)

libbpf provides BPF_KPROBE macro, similar to BPF_PROG for
fentry/fexit. Can you please use that instead? You won't need
PT_REGS_PARM macroses below, which will lead to nicer and shorter
code.

> +{
> +       struct scatterlist **sgs = PT_REGS_PARM2(ctx);
> +       unsigned int num1 = PT_REGS_PARM3(ctx);
> +       unsigned int num2 = PT_REGS_PARM4(ctx);
> +       struct scatterlist *sgp = NULL;
> +       __u64 length1 = 0, length2 = 0;
> +       unsigned int i, n, len;
> +
> +       if (config != 0)
> +               return 0;
> +
> +       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num1); i++) {
> +               for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
> +                    sgp = __sg_next(sgp)) {
> +                       bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
> +                       length1 += len;
> +                       n++;
> +               }
> +       }
> +
> +       for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num2); i++) {
> +               for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
> +                    sgp = __sg_next(sgp)) {
> +                       bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
> +                       length2 += len;
> +                       n++;
> +               }
> +       }
> +
> +       config = 1;
> +       result = length2 - length1;
> +       return 0;
> +}
> --
> 2.24.1
>