On Wed, Jun 8, 2022 at 12:27 PM Eduard Zingerman <eddyz87@xxxxxxxxx> wrote: > [...] > > Signed-off-by: Eduard Zingerman <eddyz87@xxxxxxxxx> > --- > include/linux/bpf.h | 3 + > include/linux/bpf_verifier.h | 12 +++ > kernel/bpf/bpf_iter.c | 9 +- > kernel/bpf/verifier.c | 168 +++++++++++++++++++++++++++++++++-- > 4 files changed, 183 insertions(+), 9 deletions(-) [...] > +struct bpf_loop_inline_state { > + bool initialized; /* set to true upon first entry */ > + bool fit_for_inline; /* true if callback function is the same > + * at each call and flags are always zero > + */ > + u32 callback_subprogno; /* valid when fit_for_inline is true */ > +}; nit: We only need one bit for initialized and fit_for_inline. > + > /* Possible states for alu_state member. */ > #define BPF_ALU_SANITIZE_SRC (1U << 0) > #define BPF_ALU_SANITIZE_DST (1U << 1) > @@ -373,6 +381,10 @@ struct bpf_insn_aux_data { > u32 mem_size; /* mem_size for non-struct typed var */ > }; [...] > + > +void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno) static void ... > +{ > + struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state; > + struct bpf_reg_state *regs = cur_regs(env); > + struct bpf_reg_state *flags_reg = ®s[BPF_REG_4]; > + nit: we usually don't have empty lines here. > + int flags_is_zero = > + register_is_const(flags_reg) && flags_reg->var_off.value == 0; If we replace "fit_for_inline" with "not_fit_for_inline", we can make the cannot inline case faster with: if (state->not_fit_for_inline) return; > + > + if (state->initialized) { > + state->fit_for_inline &= > + flags_is_zero && > + state->callback_subprogno == subprogno; > + } else { > + state->initialized = 1; > + state->fit_for_inline = flags_is_zero; > + state->callback_subprogno = subprogno; > + } > +} > + [...] > > +struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, > + int position, > + s32 stack_base, > + u32 callback_subprogno, > + u32 *cnt) missing static > +{ > + s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; > + s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; > + s32 r8_offset = stack_base + 2 * BPF_REG_SIZE; > + int reg_loop_max = BPF_REG_6; > + int reg_loop_cnt = BPF_REG_7; > + int reg_loop_ctx = BPF_REG_8; > + [...]