Instead of allocating and copying jump history each time we enqueue child verifier state, switch to a model where we use one common dynamically sized array of instruction jumps across all states. The key observation for proving this is correct is that jmp_history is only relevant while state is active, which means it either is a current state (and thus we are actively modifying jump history and no other state can interfere with us) or we are checkpointed state with some children still active (either enqueued or being current). In the latter case our portion of jump history is finalized and won't change or grow, so as long as we keep it immutable until the state is finalized, we are good. Now, when state is finalized and is put into state hash for potentially future pruning lookups, jump history is not used anymore. This is because jump history is only used by precision marking logic, and we never modify precision markings for finalized states. So, instead of each state having its own small jump history, we keep a global dynamically-sized jump history, where each state in current DFS path from root to active state remembers its portion of jump history. Current state can append to this history, but cannot modify any of its parent histories. Because the jmp_history array can be grown through realloc, states don't keep pointers, they instead maintain two indexes [start, end) into global jump history array. End is exclusive index, so start == end means there is no relevant jump history. This should eliminate a lot of allocations and minimize overall memory usage (but I haven't benchmarked on real hardware, and QEMU benchmarking is too noisy). Also, in the next patch we'll extend jump history to maintain additional markings for some instructions even if there was no jump, so in preparation for that call this thing a more generic "instruction history". Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx> --- include/linux/bpf_verifier.h | 8 +++-- kernel/bpf/verifier.c | 68 ++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 41 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 24213a99cc79..b57696145111 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -309,7 +309,7 @@ struct bpf_func_state { struct bpf_stack_state *stack; }; -struct bpf_idx_pair { +struct bpf_insn_hist_entry { u32 prev_idx; u32 idx; }; @@ -397,8 +397,8 @@ struct bpf_verifier_state { * For most states jmp_history_cnt is [0-3]. * For loops can go up to ~40. */ - struct bpf_idx_pair *jmp_history; - u32 jmp_history_cnt; + u32 insn_hist_start; + u32 insn_hist_end; u32 dfs_depth; }; @@ -666,6 +666,8 @@ struct bpf_verifier_env { * e.g., in reg_type_str() to generate reg_type string */ char tmp_str_buf[TMP_STR_BUF_LEN]; + struct bpf_insn_hist_entry *insn_hist; + u32 insn_hist_cap; }; __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 857d76694517..2905ce2e8b34 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1737,13 +1737,6 @@ static void free_func_state(struct bpf_func_state *state) kfree(state); } -static void clear_jmp_history(struct bpf_verifier_state *state) -{ - kfree(state->jmp_history); - state->jmp_history = NULL; - state->jmp_history_cnt = 0; -} - static void free_verifier_state(struct bpf_verifier_state *state, bool free_self) { @@ -1753,7 +1746,6 @@ static void free_verifier_state(struct bpf_verifier_state *state, free_func_state(state->frame[i]); state->frame[i] = NULL; } - clear_jmp_history(state); if (free_self) kfree(state); } @@ -1779,13 +1771,6 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, struct bpf_func_state *dst; int i, err; - dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, - src->jmp_history_cnt, sizeof(struct bpf_idx_pair), - GFP_USER); - if (!dst_state->jmp_history) - return -ENOMEM; - dst_state->jmp_history_cnt = src->jmp_history_cnt; - /* if dst has more stack frames then src frame, free them, this is also * necessary in case of exceptional exits using bpf_throw. */ @@ -1802,6 +1787,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->parent = src->parent; dst_state->first_insn_idx = src->first_insn_idx; dst_state->last_insn_idx = src->last_insn_idx; + dst_state->insn_hist_start = src->insn_hist_start; + dst_state->insn_hist_end = src->insn_hist_end; dst_state->dfs_depth = src->dfs_depth; dst_state->used_as_loop_entry = src->used_as_loop_entry; for (i = 0; i <= src->curframe; i++) { @@ -3495,40 +3482,44 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx) static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur) { - u32 cnt = cur->jmp_history_cnt; - struct bpf_idx_pair *p; + struct bpf_insn_hist_entry *p; size_t alloc_size; if (!is_jmp_point(env, env->insn_idx)) return 0; - cnt++; - alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); - p = krealloc(cur->jmp_history, alloc_size, GFP_USER); - if (!p) - return -ENOMEM; - p[cnt - 1].idx = env->insn_idx; - p[cnt - 1].prev_idx = env->prev_insn_idx; - cur->jmp_history = p; - cur->jmp_history_cnt = cnt; + if (cur->insn_hist_end + 1 > env->insn_hist_cap) { + alloc_size = size_mul(cur->insn_hist_end + 1, sizeof(*p)); + alloc_size = kmalloc_size_roundup(alloc_size); + p = krealloc(env->insn_hist, alloc_size, GFP_USER); + if (!p) + return -ENOMEM; + env->insn_hist = p; + env->insn_hist_cap = alloc_size / sizeof(*p); + } + + p = &env->insn_hist[cur->insn_hist_end]; + p->idx = env->insn_idx; + p->prev_idx = env->prev_insn_idx; + cur->insn_hist_end++; return 0; } /* Backtrack one insn at a time. If idx is not at the top of recorded * history then previous instruction came from straight line execution. */ -static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, - u32 *history) +static int get_prev_insn_idx(const struct bpf_verifier_env *env, int insn_idx, + u32 hist_start, u32 *hist_endp) { - u32 cnt = *history; + u32 hist_end = *hist_endp; - if (cnt && st->jmp_history[cnt - 1].idx == i) { - i = st->jmp_history[cnt - 1].prev_idx; - (*history)--; + if (hist_end > hist_start && env->insn_hist[hist_end - 1].idx == insn_idx) { + insn_idx = env->insn_hist[hist_end - 1].prev_idx; + (*hist_endp)--; } else { - i--; + insn_idx--; } - return i; + return insn_idx; } static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) @@ -4200,7 +4191,7 @@ static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_veri * SCALARS, as well as any other registers and slots that contribute to * a tracked state of given registers/stack slots, depending on specific BPF * assembly instructions (see backtrack_insns() for exact instruction handling - * logic). This backtracking relies on recorded jmp_history and is able to + * logic). This backtracking relies on recorded insn_history and is able to * traverse entire chain of parent states. This process ends only when all the * necessary registers/slots and their transitive dependencies are marked as * precise. @@ -4317,7 +4308,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) for (;;) { DECLARE_BITMAP(mask, 64); - u32 history = st->jmp_history_cnt; + u32 hist_end = st->insn_hist_end; if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", @@ -4399,7 +4390,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) if (i == first_idx) break; subseq_idx = i; - i = get_prev_insn_idx(st, i, &history); + i = get_prev_insn_idx(env, i, st->insn_hist_start, &hist_end); if (i >= env->prog->len) { /* This can happen if backtracking reached insn 0 * and there are still reg_mask or stack_mask @@ -17109,8 +17100,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) cur->parent = new; cur->first_insn_idx = insn_idx; + cur->insn_hist_start = cur->insn_hist_end; cur->dfs_depth = new->dfs_depth + 1; - clear_jmp_history(cur); new_sl->next = *explored_state(env, insn_idx); *explored_state(env, insn_idx) = new_sl; /* connect new state to parentage chain. Current frame needs all @@ -20807,6 +20798,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (!is_priv) mutex_unlock(&bpf_verifier_lock); vfree(env->insn_aux_data); + kvfree(env->insn_hist); err_free_env: kfree(env); return ret; -- 2.34.1