Re: [PATCH bpf-next v1 1/2] bpf: force checkpoint when jmp history is too long

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 2024-10-21 at 19:53 -0700, Alexei Starovoitov wrote:

[...]

> Instead of tweaking force_new_state earlier, it's better to do:
> if (!force_new_state && cur->jmp_history_cnt < N &&
>     env->jmps_processed - env->prev_jmps_processed < 20 && ..)
>     add_new_state = false;
>
> You're essentially proposing N == 40.
> Just add your existing comment next to the check.
> # define MAX_JMPS_PER_STATE is imo overkill.

This condition is only triggered when verifier processes loops,
but this corner case is also possible w/o loops,
e.g. consider the following program:

    func#0 @0
    0: R1=ctx() R10=fp0
    0: (79) r2 = *(u64 *)(r1 +0)          ; R1=ctx() R2_w=scalar()
    1: (b7) r0 = 0                        ; R0_w=0
    2: (35) if r2 >= 0x0 goto pc+5
    mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1 
    mark_precise: frame0: regs=r2 stack= before 1: (b7) r0 = 0
    mark_precise: frame0: regs=r2 stack= before 0: (79) r2 = *(u64 *)(r1 +0)
    2: R2_w=scalar()
    8: (35) if r2 >= 0x1 goto pc+6        ; R2_w=0
    9: (05) goto pc+0
    10: (05) goto pc+0
    11: (05) goto pc+0
    12: (05) goto pc+0
    13: (05) goto pc+0
    14: (95) exit
    
    from 8 to 15: R0_w=0 R1=ctx() R2_w=scalar(umin=1) R10=fp0
    15: R0_w=0 R1=ctx() R2_w=scalar(umin=1) R10=fp0
    15: (35) if r2 >= 0x2 goto pc+6       ; R2_w=1
    16: (05) goto pc+0
    17: (05) goto pc+0
    18: (05) goto pc+0
    19: (05) goto pc+0
    20: (05) goto pc+0
    21: (95) exit
    
    from 15 to 22: R0_w=0 R1=ctx() R2_w=scalar(umin=2) R10=fp0
    22: R0_w=0 R1=ctx() R2_w=scalar(umin=2) R10=fp0
    22: (35) if r2 >= 0x3 goto pc+6       ; R2_w=2
    23: (05) goto pc+0
    24: (05) goto pc+0
    25: (05) goto pc+0
    26: (05) goto pc+0
    27: (05) goto pc+0
    28: (95) exit

    ... and so on ...

Here amount of "goto +0;" instructions before each exit is chosen
specifically to force checkpoint before "exit;" is processed.

This takes ~10 minutes to verify on master.
Surprisingly current patch does not seem to help,
I'll investigate this tomorrow.
Full example is in the end of the email.

> I see. Thanks for explaining.
> So the bug is actually in reset logic of jmps/insns_processed
> coupled with push/pop stack.
> I think let's add cur->jmp_history_cnt < 40 check for now
> and target bpf tree (assuming no veristat regressions),
> but for bpf-next we probably need to follow up.

You'd like different fixes for bpf and bpf-next?

> We can probably remove jmps_processed counter
> and replace it with jmp_history_cnt.

I tried that :)
Results are a bit surprising, when the following patch is used:

    diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
    index 4513372c5bc8..4565c0c3e5b1 100644
    --- a/include/linux/bpf_verifier.h
    +++ b/include/linux/bpf_verifier.h
    @@ -468,6 +468,10 @@ struct bpf_verifier_state {
            u32 dfs_depth;
            u32 callback_unroll_depth;
            u32 may_goto_depth;
    +       /* number of jmps, calls, exits analyzed within this state */
    +       u32 jmps_processed;
    +       /* total number of instructions processed within this state */
    +       u32 insn_processed;
     }
    ...
    diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
    index f514247ba8ba..d61648d4c905 100644
    --- a/kernel/bpf/verifier.c
    +++ b/kernel/bpf/verifier.c
    ...
    @@ -17891,8 +17893,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
             * In tests that amounts to up to 50% reduction into total verifier
             * memory consumption and 20% verifier time speedup.
             */
    -       if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
    -           env->insn_processed - env->prev_insn_processed >= 8)
    +       if (cur->jmps_processed >= 2 &&
    +           cur->insn_processed >= 8)
                    add_new_state = true;

File                              Program           Insns       (DIFF)  States   (DIFF)
--------------------------------  ----------------  ------------------  ---------------
bpf_xdp.o                         tail_lb_ipv4         +8003 (+18.27%)   -394 (-14.18%)
bpf_xdp.o                         tail_lb_ipv6        +19757 (+24.71%)   -431 (-11.23%)
loop1.bpf.o                       nested_loops             +0 (+0.00%)      +0 (+0.00%)
loop3.bpf.o                       while_true               +0 (+0.00%)      +0 (+0.00%)
pyperf100.bpf.o                   on_event               -213 (-0.32%)  -1128 (-19.26%)
pyperf180.bpf.o                   on_event              +7863 (+6.25%)  -1801 (-17.23%)
pyperf600.bpf.o                   on_event            -79267 (-14.44%)  -5090 (-17.24%)
pyperf600_nounroll.bpf.o          on_event             +39203 (+7.78%)  -6055 (-17.69%)
strobemeta.bpf.o                  on_event          +587119 (+246.59%)  -1909 (-37.09%)
strobemeta_nounroll1.bpf.o        on_event            +49553 (+91.96%)   -667 (-42.24%)
strobemeta_nounroll2.bpf.o        on_event           +109673 (+92.35%)  -1732 (-46.19%)
strobemeta_subprogs.bpf.o         on_event               -413 (-0.76%)   -582 (-38.77%)
test_cls_redirect.bpf.o           cls_redirect        +20875 (+58.96%)     -73 (-3.35%)
test_cls_redirect_subprogs.bpf.o  cls_redirect        +12337 (+20.37%)     +52 (+1.25%)
test_verif_scale1.bpf.o           balancer_ingress   +224652 (+41.09%)    -670 (-7.76%)
test_verif_scale2.bpf.o           balancer_ingress      +7145 (+0.92%)  +2983 (+97.87%)
test_verif_scale3.bpf.o           balancer_ingress   +162514 (+19.40%)  -1959 (-22.68%)
verifier_search_pruning.bpf.o     short_loop1                      N/A              N/A

Plus timer_mim.c starts to fail because we do not force checkpoint at
entry to async callback :)

> Based on the above explanation insns_processed counter is
> also bogus.

My reasoning is that jmp_history_cnt is what matters for
mark_chain_precision(), so it should be a marker for state cut-off,
insns_processed does not seem to add such constraints.

---

diff --git a/tools/testing/selftests/bpf/prog_tests/jumpjump.c b/tools/testing/selftests/bpf/prog_tests/jumpjump.c
new file mode 100644
index 000000000000..f78675ba0341
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/jumpjump.c
@@ -0,0 +1,62 @@
+#include <test_progs.h>
+
+#define MAX_INSNS (512 * 1024)
+
+static char log_buf[16 * 1024* 1024];
+
+void test_jumpjump(void)
+{
+	/* Generate a series of jumps that would take a long time to verify. */
+	struct bpf_insn *insns = NULL;
+	int jmps_processed = 0;
+	int insn_processed = 0;
+	int jmp_idx = -1;
+	int prog_fd = -1;
+	int cnt = 0;
+	int i = 0;
+
+	insns = calloc(MAX_INSNS + 32, sizeof(*insns));
+	if (!ASSERT_OK_PTR(insns, "insns = calloc()"))
+		return;
+	insns[i++] = BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 0);
+	insn_processed++;
+	insns[i++] = BPF_MOV64_IMM(BPF_REG_0, 0);
+	insn_processed++;
+	while (i < MAX_INSNS) {
+		if (jmp_idx > 0)
+			insns[jmp_idx].off = i - jmp_idx - 1;
+		jmp_idx = i;
+		insns[i++] = BPF_JMP_IMM(BPF_JGE, BPF_REG_2, cnt++, 0);
+		jmps_processed++;
+		insn_processed++;
+		while (insn_processed < 7 || jmps_processed < 2) {
+			insns[i++] = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
+			jmps_processed++;
+			insn_processed++;
+		}
+		insns[i++] = BPF_EXIT_INSN();
+		jmps_processed = 1;
+		insn_processed = 1;
+	}
+	if (jmp_idx > 0)
+		insns[jmp_idx].off = i - jmp_idx - 1;
+	insns[i++] = BPF_EXIT_INSN();
+
+	LIBBPF_OPTS(bpf_prog_load_opts, opts);
+	opts.log_buf = log_buf;
+	opts.log_size = sizeof(log_buf);
+	opts.log_level = 0;
+	if (env.verbosity >= VERBOSE_VERY)
+		opts.log_level = 1 | 2 | 4;
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, NULL, "GPL", insns, i, &opts);
+	if (prog_fd < 0)
+		PRINT_FAIL("Can't load program, errno %d (%s)\n", errno, strerror(errno));
+	if (env.verbosity >= VERBOSE_VERY || prog_fd < 0) {
+		printf("Verifier log:\n%s\n", log_buf);
+	}
+	if (prog_fd < 0)
+		goto out;
+out:
+	close(prog_fd);
+	free(insns);
+}






[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux