On Thu, Jun 13, 2019 at 9:49 AM Alexei Starovoitov <ast@xxxxxxxxxx> wrote: > > Allow the verifier to validate the loops by simulating their execution. > Exisiting programs have used '#pragma unroll' to unroll the loops > by the compiler. Instead let the verifier simulate all iterations > of the loop. > In order to do that introduce parentage chain of bpf_verifier_state and > 'branches' counter for the number of branches left to explore. > See more detailed algorithm description in bpf_verifier.h > > This algorithm borrows the key idea from Edward Cree approach: > https://patchwork.ozlabs.org/patch/877222/ > Additional state pruning heuristics make such brute force loop walk > practical even for large loops. > > Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx> > --- LGTM, few suggestions below. Acked-by: Andrii Nakryiko <andriin@xxxxxx> > include/linux/bpf_verifier.h | 51 +++++++++++++- > kernel/bpf/verifier.c | 133 ++++++++++++++++++++++++++++++++--- > 2 files changed, 175 insertions(+), 9 deletions(-) > > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h > index 704ed7971472..03037373b447 100644 > --- a/include/linux/bpf_verifier.h > +++ b/include/linux/bpf_verifier.h > @@ -194,6 +194,53 @@ struct bpf_func_state { > struct bpf_verifier_state { > /* call stack tracking */ > struct bpf_func_state *frame[MAX_CALL_FRAMES]; > + struct bpf_verifier_state *parent; > + /* > + * 'branches' field is the number of branches left to explore: > + * 0 - all possible paths from this state reached bpf_exit or > + * were safely pruned > + * 1 - at least one path is being explored. > + * This state hasn't reached bpf_exit > + * 2 - at least two paths are being explored. > + * This state is an immediate parent of two children. > + * One is fallthrough branch with branches==1 and another > + * state is pushed into stack (to be explored later) also with > + * branches==1. The parent of this state has branches==1. > + * The verifier state tree connected via 'parent' pointer looks like: > + * 1 > + * 1 > + * 2 -> 1 (first 'if' pushed into stack) > + * 1 > + * 2 -> 1 (second 'if' pushed into stack) > + * 1 > + * 1 > + * 1 bpf_exit. > + * > + * Once do_check() reaches bpf_exit, it calls update_branch_counts() > + * and the verifier state tree will look: > + * 1 > + * 1 > + * 2 -> 1 (first 'if' pushed into stack) > + * 1 > + * 1 -> 1 (second 'if' pushed into stack) > + * 0 > + * 0 > + * 0 bpf_exit. > + * After pop_stack() the do_check() will resume at second 'if'. > + * > + * If is_state_visited() sees a state with branches > 0 it means > + * there is a loop. If such state is exactly equal to the current state > + * it's an infinite loop. Note states_equal() checks for states > + * equvalency, so two states being 'states_equal' does not mean > + * infinite loop. The exact comparison is provided by > + * states_maybe_looping() function. It's a stronger pre-check and > + * much faster than states_equal(). > + * > + * This algorithm may not find all possible infinite loops or > + * loop iteration count may be too high. > + * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in. > + */ > + u32 branches; > u32 insn_idx; > u32 curframe; > u32 active_spin_lock; > @@ -312,7 +359,9 @@ struct bpf_verifier_env { > } cfg; > u32 subprog_cnt; > /* number of instructions analyzed by the verifier */ > - u32 insn_processed; > + u32 prev_insn_processed, insn_processed; > + /* number of jmps, calls, exits analyzed so far */ > + u32 prev_jmps_processed, jmps_processed; > /* total verification time */ > u64 verification_time; > /* maximum number of verifier states kept in 'branching' instructions */ > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index c79c09586a9e..55d5ab4ab83e 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -721,6 +721,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, > dst_state->speculative = src->speculative; > dst_state->curframe = src->curframe; > dst_state->active_spin_lock = src->active_spin_lock; > + dst_state->branches = src->branches; > + dst_state->parent = src->parent; > for (i = 0; i <= src->curframe; i++) { > dst = dst_state->frame[i]; > if (!dst) { > @@ -736,6 +738,23 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, > return 0; > } > > +static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) > +{ > + while (st) { > + u32 br = --st->branches; > + > + /* WARN_ON(br > 1) technically makes sense here, > + * but see comment in push_stack(), hence: > + */ > + WARN_ONCE((int)br < 0, > + "BUG update_branch_counts:branches_to_explore=%d\n", > + br); > + if (br) > + break; > + st = st->parent; > + } > +} > + > static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, > int *insn_idx) > { > @@ -789,6 +808,18 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, > env->stack_size); > goto err; > } > + if (elem->st.parent) { > + ++elem->st.parent->branches; > + /* WARN_ON(branches > 2) technically makes sense here, > + * but > + * 1. speculative states will bump 'branches' for non-branch > + * instructions > + * 2. is_state_visited() heuristics may decide not to create > + * a new state for a sequence of branches and all such current > + * and cloned states will be pointing to a single parent state > + * which might have large 'branches' count. > + */ > + } > return &elem->st; > err: > free_verifier_state(env->cur_state, true); > @@ -5685,7 +5716,8 @@ static void init_explored_state(struct bpf_verifier_env *env, int idx) > * w - next instruction > * e - edge > */ > -static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) > +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, > + bool loop_ok) > { > int *insn_stack = env->cfg.insn_stack; > int *insn_state = env->cfg.insn_state; > @@ -5715,6 +5747,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) > insn_stack[env->cfg.cur_stack++] = w; > return 1; > } else if ((insn_state[w] & 0xF0) == DISCOVERED) { > + if (loop_ok && env->allow_ptr_leaks) allow_ptr_leaks is used as a proxy for SYS_CAP_ADMIN, right? Maybe have explicit env->allow_loops instead. That would make more sense when reading code. Plus, it would decouple loop enablement from SYS_CAP_ADMIN, so would allow more fine-grained control, if necessary. > + return 0; > verbose_linfo(env, t, "%d: ", t); > verbose_linfo(env, w, "%d: ", w); > verbose(env, "back-edge from insn %d to %d\n", t, w); > @@ -5766,7 +5800,7 @@ static int check_cfg(struct bpf_verifier_env *env) > if (opcode == BPF_EXIT) { > goto mark_explored; > } else if (opcode == BPF_CALL) { > - ret = push_insn(t, t + 1, FALLTHROUGH, env); > + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); > if (ret == 1) > goto peek_stack; > else if (ret < 0) > @@ -5775,7 +5809,8 @@ static int check_cfg(struct bpf_verifier_env *env) > init_explored_state(env, t + 1); > if (insns[t].src_reg == BPF_PSEUDO_CALL) { > init_explored_state(env, t); > - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); > + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, > + env, false); > if (ret == 1) > goto peek_stack; > else if (ret < 0) > @@ -5788,7 +5823,7 @@ static int check_cfg(struct bpf_verifier_env *env) > } > /* unconditional jump with single edge */ > ret = push_insn(t, t + insns[t].off + 1, > - FALLTHROUGH, env); > + FALLTHROUGH, env, true); > if (ret == 1) > goto peek_stack; > else if (ret < 0) > @@ -5801,13 +5836,13 @@ static int check_cfg(struct bpf_verifier_env *env) > } else { > /* conditional jump with two edges */ > init_explored_state(env, t); > - ret = push_insn(t, t + 1, FALLTHROUGH, env); > + ret = push_insn(t, t + 1, FALLTHROUGH, env, true); > if (ret == 1) > goto peek_stack; > else if (ret < 0) > goto err_free; > > - ret = push_insn(t, t + insns[t].off + 1, BRANCH, env); > + ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true); > if (ret == 1) > goto peek_stack; > else if (ret < 0) > @@ -5817,7 +5852,7 @@ static int check_cfg(struct bpf_verifier_env *env) > /* all other non-branch instructions with single > * fall-through edge > */ > - ret = push_insn(t, t + 1, FALLTHROUGH, env); > + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); > if (ret == 1) > goto peek_stack; > else if (ret < 0) > @@ -6250,6 +6285,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, > > sl = *explored_state(env, insn); > while (sl) { > + if (sl->state.branches) > + goto next; > if (sl->state.insn_idx != insn || > sl->state.curframe != cur->curframe) > goto next; > @@ -6614,12 +6651,32 @@ static int propagate_liveness(struct bpf_verifier_env *env, > return 0; > } > > +static bool states_maybe_looping(struct bpf_verifier_state *old, > + struct bpf_verifier_state *cur) > +{ > + struct bpf_func_state *fold, *fcur; > + int i, fr = cur->curframe; > + > + if (old->curframe != fr) > + return false; > + > + fold = old->frame[fr]; > + fcur = cur->frame[fr]; > + for (i = 0; i < MAX_BPF_REG; i++) > + if (memcmp(&fold->regs[i], &fcur->regs[i], > + offsetof(struct bpf_reg_state, parent))) > + return false; > + return true; > +} > + > + > static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > { > struct bpf_verifier_state_list *new_sl; > struct bpf_verifier_state_list *sl, **pprev; > struct bpf_verifier_state *cur = env->cur_state, *new; > int i, j, err, states_cnt = 0; > + bool add_new_state = false; > > if (!env->insn_aux_data[insn_idx].prune_point) > /* this 'insn_idx' instruction wasn't marked, so we will not > @@ -6627,6 +6684,18 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > */ > return 0; > > + /* bpf progs typically have pruning point every 4 instructions > + * http://vger.kernel.org/bpfconf2019.html#session-1 > + * Do not add new state for future pruning if the verifier hasn't seen > + * at least 2 jumps and at least 8 instructions. > + * This heuristics helps decrease 'total_states' and 'peak_states' metric. > + * In tests that amounts to up to 50% reduction into total verifier > + * memory consumption and 20% verifier time speedup. > + */ > + if (env->jmps_processed - env->prev_jmps_processed >= 2 && > + env->insn_processed - env->prev_insn_processed >= 8) > + add_new_state = true; nit: trivial if, why not: add_new_state = env->jmps_processed - env->prev_jmps_processed >= 2 && env->insn_processed - env->prev_insn_processed >= 8; > + > pprev = explored_state(env, insn_idx); > sl = *pprev; > > @@ -6636,6 +6705,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > states_cnt++; > if (sl->state.insn_idx != insn_idx) > goto next; > + if (sl->state.branches) { > + if (states_maybe_looping(&sl->state, cur) && > + states_equal(env, &sl->state, cur)) { > + verbose_linfo(env, insn_idx, "; "); > + verbose(env, "infinite loop detected at insn %d\n", insn_idx); > + return -EINVAL; > + } > + /* if the verifier is processing a loop, avoid adding new state > + * too often, since different loop iterations have distinct > + * states and may not help future pruning. > + * This threshold shouldn't be too low to make sure that > + * a loop with large bound will be rejected quickly. > + * The most abusive loop will be: > + * r1 += 1 > + * if r1 < 1000000 goto pc-2 > + * 1M insn_procssed limit / 100 == 10k peak states. > + * This threshold shouldn't be too high either, since states > + * at the end of the loop are likely to be useful in pruning. > + */ > + if (env->jmps_processed - env->prev_jmps_processed < 20 && > + env->insn_processed - env->prev_insn_processed < 100) > + add_new_state = false; same as above > + goto miss; > + } > if (states_equal(env, &sl->state, cur)) { > sl->hit_cnt++; > /* reached equivalent register/stack state, > @@ -6653,7 +6746,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > return err; > return 1; > } > - sl->miss_cnt++; > +miss: > + /* when new state is not going to be added do not increase miss count. > + * Otherwise several loop iterations will remove the state > + * recorded earlier. The goal of these heuristics is to have > + * states from some iterations of the loop (some in the beginning > + * and some at the end) to help pruning. > + */ > + if (add_new_state) > + sl->miss_cnt++; > /* heuristic to determine whether this state is beneficial > * to keep checking from state equivalence point of view. > * Higher numbers increase max_states_per_insn and verification time, > @@ -6665,6 +6766,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > */ > *pprev = sl->next; > if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { > + u32 br = sl->state.branches; > + > + WARN_ONCE(br, > + "BUG live_done but branches_to_explore %d\n", > + br); > free_verifier_state(&sl->state, false); > kfree(sl); > env->peak_states--; > @@ -6690,6 +6796,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) > return 0; > > + if (!add_new_state) > + return 0; > + > /* there were no equivalent states, remember current one. > * technically the current state is not proven to be safe yet, > * but it will either reach outer most bpf_exit (which means it's safe) > @@ -6702,6 +6811,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) > return -ENOMEM; > env->total_states++; > env->peak_states++; > + env->prev_jmps_processed = env->jmps_processed; > + env->prev_insn_processed = env->insn_processed; > > /* add new state to the head of linked list */ > new = &new_sl->state; > @@ -6712,6 +6823,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) Few lines above this there is comment stating " Since there are no loops ...", can you please fix it? > return err; > } > new->insn_idx = insn_idx; > + WARN_ONCE(new->branches != 1, > + "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx); > + cur->parent = new; > new_sl->next = *explored_state(env, insn_idx); > *explored_state(env, insn_idx) = new_sl; > /* connect new state to parentage chain. Current frame needs all > @@ -6798,6 +6912,7 @@ static int do_check(struct bpf_verifier_env *env) > return -ENOMEM; > state->curframe = 0; > state->speculative = false; > + state->branches = 1; > state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); > if (!state->frame[0]) { > kfree(state); > @@ -7004,6 +7119,7 @@ static int do_check(struct bpf_verifier_env *env) > } else if (class == BPF_JMP || class == BPF_JMP32) { > u8 opcode = BPF_OP(insn->code); > > + env->jmps_processed++; > if (opcode == BPF_CALL) { > if (BPF_SRC(insn->code) != BPF_K || > insn->off != 0 || > @@ -7089,6 +7205,7 @@ static int do_check(struct bpf_verifier_env *env) > if (err) > return err; > process_bpf_exit: > + update_branch_counts(env, env->cur_state); > err = pop_stack(env, &env->prev_insn_idx, > &env->insn_idx); > if (err < 0) { > -- > 2.20.0 >