Liveness analysis DFA computes a set of registers that are live before each instruction. Leverage this information to skip the comparison of dead registers in `func_states_equal()`. This helps with the convergence of iterator-based loops, as `bpf_reg_state->live` marks can't be used when loops are processed. For now, enable this only in privileged mode. Verification performance impact on selftests and sched_ext is listed below. (Using `veristat -C -f "insns_pct>5" -f "!insns<200"` filters). selftests: File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) -------------------- ----------------------------- --------- --------- ---------------- ---------- ---------- -------------- arena_htab_asm.bpf.o arena_htab_asm 445 413 -32 (-7.19%) 37 33 -4 (-10.81%) arena_list.bpf.o arena_list_add 1822 833 -989 (-54.28%) 37 22 -15 (-40.54%) dynptr_success.bpf.o test_dynptr_copy 267 203 -64 (-23.97%) 22 16 -6 (-27.27%) dynptr_success.bpf.o test_dynptr_copy_xdp 719 615 -104 (-14.46%) 68 58 -10 (-14.71%) iters.bpf.o checkpoint_states_deletion 22154 1211 -20943 (-94.53%) 918 40 -878 (-95.64%) iters.bpf.o clean_live_states 1348 588 -760 (-56.38%) 136 66 -70 (-51.47%) iters.bpf.o iter_nested_deeply_iters 367 300 -67 (-18.26%) 43 37 -6 (-13.95%) iters.bpf.o iter_nested_iters 772 632 -140 (-18.13%) 72 62 -10 (-13.89%) iters.bpf.o iter_pass_iter_ptr_to_subprog 285 243 -42 (-14.74%) 30 26 -4 (-13.33%) iters.bpf.o iter_subprog_iters 808 664 -144 (-17.82%) 68 59 -9 (-13.24%) iters.bpf.o loop_state_deps2 356 321 -35 (-9.83%) 35 32 -3 (-8.57%) iters_css.bpf.o iter_css_for_each 296 267 -29 (-9.80%) 32 29 -3 (-9.38%) pyperf600_iter.bpf.o on_event 6379 2591 -3788 (-59.38%) 286 192 -94 (-32.87%) test_usdt.bpf.o usdt12 1983 1803 -180 (-9.08%) 143 136 -7 (-4.90%) sched_ext: File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ----------------- ---------------------- --------- --------- ---------------- ---------- ---------- --------------- bpf.bpf.o lavd_dispatch 154608 120590 -34018 (-22.00%) 8950 7065 -1885 (-21.06%) bpf.bpf.o lavd_init 7330 6935 -395 (-5.39%) 516 480 -36 (-6.98%) bpf.bpf.o layered_dispatch 9039 5590 -3449 (-38.16%) 662 501 -161 (-24.32%) bpf.bpf.o layered_dump 5022 3669 -1353 (-26.94%) 298 237 -61 (-20.47%) bpf.bpf.o layered_init 5549 4298 -1251 (-22.54%) 523 423 -100 (-19.12%) bpf.bpf.o layered_init_task 270 234 -36 (-13.33%) 24 22 -2 (-8.33%) bpf.bpf.o layered_runnable 1899 1635 -264 (-13.90%) 151 125 -26 (-17.22%) bpf.bpf.o p2dq_dispatch 659 533 -126 (-19.12%) 66 53 -13 (-19.70%) bpf.bpf.o p2dq_init 1936 1560 -376 (-19.42%) 170 142 -28 (-16.47%) bpf.bpf.o refresh_layer_cpumasks 1285 785 -500 (-38.91%) 120 78 -42 (-35.00%) bpf.bpf.o rustland_init 476 413 -63 (-13.24%) 37 34 -3 (-8.11%) bpf.bpf.o rustland_init 476 413 -63 (-13.24%) 37 34 -3 (-8.11%) bpf.bpf.o rusty_select_cpu 1386 1110 -276 (-19.91%) 125 108 -17 (-13.60%) bpf.bpf.o rusty_set_cpumask 4558 4276 -282 (-6.19%) 323 313 -10 (-3.10%) scx_central.bpf.o central_dispatch 600 422 -178 (-29.67%) 59 43 -16 (-27.12%) scx_central.bpf.o central_init 632 318 -314 (-49.68%) 39 28 -11 (-28.21%) scx_nest.bpf.o nest_init 601 519 -82 (-13.64%) 58 51 -7 (-12.07%) scx_pair.bpf.o pair_dispatch 1914 1376 -538 (-28.11%) 142 111 -31 (-21.83%) scx_qmap.bpf.o qmap_dispatch 2187 1703 -484 (-22.13%) 174 141 -33 (-18.97%) scx_qmap.bpf.o qmap_init 22777 18458 -4319 (-18.96%) 768 654 -114 (-14.84%) Signed-off-by: Eduard Zingerman <eddyz87@xxxxxxxxx> --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8c23958bc471..39097835b326 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -733,6 +733,7 @@ struct bpf_verifier_env { * to writes with variable offset and to indirect (helper) accesses. */ bool allow_uninit_stack; + bool allow_liveregs_dfa; bool bpf_capable; bool bypass_spec_v1; bool bypass_spec_v4; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4ac7dc58d9b1..b6ab49ee31e1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18358,15 +18358,20 @@ static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *c * the current state will reach 'bpf_exit' instruction safely */ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur, enum exact_level exact) + struct bpf_func_state *cur, u32 insn_idx, enum exact_level exact) { - int i; + u16 live_regs; + u16 i; if (old->callback_depth > cur->callback_depth) return false; + live_regs = env->allow_liveregs_dfa + ? env->insn_aux_data[insn_idx].live_regs_before + : 0xffff; for (i = 0; i < MAX_BPF_REG; i++) - if (!regsafe(env, &old->regs[i], &cur->regs[i], + if ((BIT(i) & live_regs) && + !regsafe(env, &old->regs[i], &cur->regs[i], &env->idmap_scratch, exact)) return false; @@ -18387,6 +18392,7 @@ static bool states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, enum exact_level exact) { + u32 insn_idx; int i; if (old->curframe != cur->curframe) @@ -18410,9 +18416,12 @@ static bool states_equal(struct bpf_verifier_env *env, * and all frame states need to be equivalent */ for (i = 0; i <= old->curframe; i++) { + insn_idx = i == old->curframe + ? env->insn_idx + : old->frame[i + 1]->callsite; if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; - if (!func_states_equal(env, old->frame[i], cur->frame[i], exact)) + if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact)) return false; } return true; @@ -23647,6 +23656,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 ret = compute_live_registers(env); if (ret < 0) goto skip_full_check; + env->allow_liveregs_dfa = true; } ret = mark_fastcall_patterns(env); -- 2.48.1