If we attach fexit/fmod_ret to __noreturn functions, it will cause an issue that the bpf trampoline image will be left over even if the bpf link has been destroyed. Take attaching do_exit() with fexit for example. The fexit works as follows, bpf_trampoline + __bpf_tramp_enter + percpu_ref_get(&tr->pcref); + call do_exit() + __bpf_tramp_exit + percpu_ref_put(&tr->pcref); Since do_exit() never returns, the refcnt of the trampoline image is never decremented, preventing it from being freed. That can be verified with as follows, $ bpftool link show <<<< nothing output $ grep "bpf_trampoline_[0-9]" /proc/kallsyms ffffffffc04cb000 t bpf_trampoline_6442526459 [bpf] <<<< leftover In this patch, all functions annotated with __noreturn are rejected, except for the following cases: - Functions that result in a system reboot, such as panic, machine_real_restart and rust_begin_unwind - Functions that are never executed by tasks, such as rest_init and cpu_startup_entry - Functions implemented in assembly, such as rewind_stack_and_make_dead and xen_cpu_bringup_again, lack an associated BTF ID. With this change, attaching fexit probes to functions like do_exit() will be rejected. $ ./fexit libbpf: prog 'fexit': BPF program load failed: -EINVAL libbpf: prog 'fexit': -- BEGIN PROG LOAD LOG -- Attaching fexit/fmod_ret to __noreturn functions is rejected. Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> --- kernel/bpf/verifier.c | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9971c03adfd5..b7d7d5c4989f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -22841,6 +22841,49 @@ BTF_ID(func, __rcu_read_unlock) #endif BTF_SET_END(btf_id_deny) +/* fexit and fmod_ret can't be used to attach to __noreturn functions. + * Currently, we must manually list all __noreturn functions here. Once a more + * robust solution is implemented, this workaround can be removed. + */ +BTF_SET_START(noreturn_deny) +#define NORETURN(fn) BTF_ID(func, fn) +#ifdef CONFIG_IA32_EMULATION +NORETURN(__ia32_sys_exit) +NORETURN(__ia32_sys_exit_group) +#endif +#ifdef CONFIG_KUNIT +NORETURN(__kunit_abort) +NORETURN(kunit_try_catch_throw) +#endif +#ifdef CONFIG_MODULES +NORETURN(__module_put_and_kthread_exit) +#endif +#ifdef CONFIG_X86_64 +NORETURN(__x64_sys_exit) +NORETURN(__x64_sys_exit_group) +#endif +#ifdef CONFIG_XEN_PV_SMP +NORETURN(cpu_bringup_and_idle) +#endif +NORETURN(do_exit) +NORETURN(do_group_exit) +#if defined(CONFIG_X86) && defined(CONFIG_SMP) +NORETURN(hlt_play_dead) +#endif +#ifdef CONFIG_HYPERV +NORETURN(hv_ghcb_terminate) +#endif +NORETURN(kthread_complete_and_exit) +NORETURN(kthread_exit) +NORETURN(make_task_dead) +#ifdef CONFIG_AMD_MEM_ENCRYPT +NORETURN(sev_es_terminate) +NORETURN(snp_abort) +#endif +NORETURN(stop_this_cpu) +#undef NORETURN +BTF_SET_END(noreturn_deny) + static bool can_be_sleepable(struct bpf_prog *prog) { if (prog->type == BPF_PROG_TYPE_TRACING) { @@ -22929,6 +22972,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) } else if (prog->type == BPF_PROG_TYPE_TRACING && btf_id_set_contains(&btf_id_deny, btf_id)) { return -EINVAL; + } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT || + prog->expected_attach_type == BPF_MODIFY_RETURN) && + btf_id_set_contains(&noreturn_deny, btf_id)) { + verbose(env, "Attaching fexit/fmod_ret to __noreturn functions is rejected.\n"); + return -EINVAL; } key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); -- 2.43.5