Arguments to a raw tracepoint are tagged as trusted, which carries the semantics that the pointer will be non-NULL. However, in certain cases, a raw tracepoint argument may end up being NULL. More context about this issue is available in [0]. Thus, there is a discrepancy between the reality, that raw_tp arguments can actually be NULL, and the verifier's knowledge, that they are never NULL, causing explicit NULL checks to be deleted, and accesses to such pointers potentially crashing the kernel. A previous attempt [1], i.e. the second fixed commit, was made to simulate symbolic execution as if in most accesses, the argument is a non-NULL raw_tp, except for conditional jumps. This tried to suppress branch prediction while preserving compatibility, but surfaced issues with production programs that were difficult to solve without increasing verifier complexity. A more complete discussion of issues and fixes is available at [2]. Fix this by maintaining an explicit, incomplete list of tracepoints where the arguments are known to be NULL, and mark the positional arguments as PTR_MAYBE_NULL. Additionally, capture the tracepoints where arguments are known to be PTR_ERR, and mark these arguments as scalar values to prevent potential dereference. In the future, an automated pass will be used to produce such a list, or insert __nullable annotations automatically for tracepoints. Anyhow, this is an attempt to close the gap until the automation lands, and reflets the current best known list according to Jiri's analysis in [3]. [0]: https://lore.kernel.org/bpf/ZrCZS6nisraEqehw@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx [1]: https://lore.kernel.org/all/20241104171959.2938862-1-memxor@xxxxxxxxx [2]: https://lore.kernel.org/bpf/20241206161053.809580-1-memxor@xxxxxxxxx [3]: https://lore.kernel.org/bpf/Z1d-qbCdtJqg6Er4@krava Reported-by: Juri Lelli <juri.lelli@xxxxxxxxxx> # original bug Reported-by: Manu Bretelle <chantra@xxxxxxxx> # bugs in masking fix Fixes: 3f00c5239344 ("bpf: Allow trusted pointers to be passed to KF_TRUSTED_ARGS kfuncs") Fixes: cb4158ce8ec8 ("bpf: Mark raw_tp arguments with PTR_MAYBE_NULL") Co-developed-by: Jiri Olsa <jolsa@xxxxxxxxxx> Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> --- kernel/bpf/btf.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ed3219da7181..cb72cbf04d12 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6439,6 +6439,96 @@ int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, return off; } +struct bpf_raw_tp_null_args { + const char *func; + u64 mask; +}; + +#define RAW_TP_NULL_ARGS(str, arg) { .func = "btf_trace_" #str, .mask = (arg) } +/* Use 1-based indexing for argno */ +#define NULL_ARG(argno) (1 << (argno)) + +struct bpf_raw_tp_null_args raw_tp_null_args[] = { + /* sched */ + RAW_TP_NULL_ARGS(sched_pi_setprio, NULL_ARG(2)), + /* ... from sched_numa_pair_template event class */ + RAW_TP_NULL_ARGS(sched_stick_numa, NULL_ARG(3)), + RAW_TP_NULL_ARGS(sched_swap_numa, NULL_ARG(3)), + /* afs */ + RAW_TP_NULL_ARGS(afs_make_fs_call, NULL_ARG(2)), + RAW_TP_NULL_ARGS(afs_make_fs_calli, NULL_ARG(2)), + RAW_TP_NULL_ARGS(afs_make_fs_call1, NULL_ARG(2)), + RAW_TP_NULL_ARGS(afs_make_fs_call2, NULL_ARG(2)), + RAW_TP_NULL_ARGS(afs_protocol_error, NULL_ARG(1)), + RAW_TP_NULL_ARGS(afs_flock_ev, NULL_ARG(2)), + /* cachefiles */ + RAW_TP_NULL_ARGS(cachefiles_lookup, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_unlink, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_rename, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_prep_read, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_mark_active, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_mark_failed, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_mark_inactive, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_vfs_error, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_io_error, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_ondemand_open, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_ondemand_copen, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_ondemand_close, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_ondemand_read, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_ondemand_cread, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_ondemand_fd_write, NULL_ARG(1)), + RAW_TP_NULL_ARGS(cachefiles_ondemand_fd_release, NULL_ARG(1)), + /* ext4, from ext4__mballoc event class */ + RAW_TP_NULL_ARGS(ext4_mballoc_discard, NULL_ARG(2)), + RAW_TP_NULL_ARGS(ext4_mballoc_free, NULL_ARG(2)), + /* fib */ + RAW_TP_NULL_ARGS(fib_table_lookup, NULL_ARG(3)), + /* filelock */ + /* ... from filelock_lock event class */ + RAW_TP_NULL_ARGS(posix_lock_inode, NULL_ARG(2)), + RAW_TP_NULL_ARGS(fcntl_setlk, NULL_ARG(2)), + RAW_TP_NULL_ARGS(locks_remove_posix, NULL_ARG(2)), + RAW_TP_NULL_ARGS(flock_lock_inode, NULL_ARG(2)), + /* ... from filelock_lease event class */ + RAW_TP_NULL_ARGS(break_lease_noblock, NULL_ARG(2)), + RAW_TP_NULL_ARGS(break_lease_block, NULL_ARG(2)), + RAW_TP_NULL_ARGS(break_lease_unblock, NULL_ARG(2)), + RAW_TP_NULL_ARGS(generic_delete_lease, NULL_ARG(2)), + RAW_TP_NULL_ARGS(time_out_leases, NULL_ARG(2)), + /* host1x */ + RAW_TP_NULL_ARGS(host1x_cdma_push_gather, NULL_ARG(5)), + /* huge_memory */ + RAW_TP_NULL_ARGS(mm_khugepaged_scan_pmd, NULL_ARG(2)), + RAW_TP_NULL_ARGS(mm_collapse_huge_page_isolate, NULL_ARG(1)), + RAW_TP_NULL_ARGS(mm_khugepaged_scan_file, NULL_ARG(2)), + RAW_TP_NULL_ARGS(mm_khugepaged_collapse_file, NULL_ARG(2)), + /* kmem */ + RAW_TP_NULL_ARGS(mm_page_alloc, NULL_ARG(1)), + RAW_TP_NULL_ARGS(mm_page_pcpu_drain, NULL_ARG(1)), + /* .. from mm_page event class */ + RAW_TP_NULL_ARGS(mm_page_alloc_zone_locked, NULL_ARG(1)), + /* netfs */ + RAW_TP_NULL_ARGS(netfs_failure, NULL_ARG(2)), + /* power */ + RAW_TP_NULL_ARGS(device_pm_callback_start, NULL_ARG(2)), + /* qdisc */ + RAW_TP_NULL_ARGS(qdisc_dequeue, NULL_ARG(4)), + /* rxrpc */ + RAW_TP_NULL_ARGS(rxrpc_recvdata, NULL_ARG(1)), + RAW_TP_NULL_ARGS(rxrpc_resend, NULL_ARG(2)), + /* sunrpc */ + RAW_TP_NULL_ARGS(xs_stream_read_data, NULL_ARG(1)), + /* tcp */ + RAW_TP_NULL_ARGS(tcp_send_reset, NULL_ARG(1) | NULL_ARG(2)), + /* tegra_apb_dma */ + RAW_TP_NULL_ARGS(tegra_dma_tx_status, NULL_ARG(3)), + /* timer_migration */ + RAW_TP_NULL_ARGS(tmigr_update_events, NULL_ARG(1)), + /* writeback, from writeback_folio_template event class */ + RAW_TP_NULL_ARGS(writeback_dirty_folio, NULL_ARG(2)), + RAW_TP_NULL_ARGS(folio_wait_writeback, NULL_ARG(2)), +}; + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -6449,6 +6539,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; const struct btf_param *args; + bool ptr_err_raw_tp = false; const char *tag_value; u32 nr_args, arg; int i, ret; @@ -6591,6 +6682,36 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (btf_param_match_suffix(btf, &args[arg], "__nullable")) info->reg_type |= PTR_MAYBE_NULL; + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { + struct btf *btf = prog->aux->attach_btf; + const struct btf_type *t; + const char *tname; + + t = btf_type_by_id(btf, prog->aux->attach_btf_id); + if (!t) + goto done; + tname = btf_name_by_offset(btf, t->name_off); + if (!tname) + goto done; + for (int i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) { + /* Is this a func with potential NULL args? */ + if (strcmp(tname, raw_tp_null_args[i].func)) + continue; + /* Is the current arg NULL? */ + if (raw_tp_null_args[i].mask & NULL_ARG(arg + 1)) + info->reg_type |= PTR_MAYBE_NULL; + break; + } + /* Hardcode the only cases which has a IS_ERR pointer, i.e. + * mr_integ_alloc's 4th argument (mr), and + * cachefiles_lookup's 3rd argument (de). + */ + if (!strcmp(tname, "btf_trace_mr_integ_alloc") && (arg + 1) == 4) + ptr_err_raw_tp = true; + if (!strcmp(tname, "btf_trace_cachefiles_lookup") && (arg + 1) == 3) + ptr_err_raw_tp = true; + } +done: if (tgt_prog) { enum bpf_prog_type tgt_type; @@ -6635,6 +6756,14 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", tname, arg, info->btf_id, btf_type_str(t), __btf_name_by_offset(btf, t->name_off)); + + /* Perform all checks on the validity of type for this argument, but if + * we know it can be IS_ERR at runtime, scrub pointer type and mark as + * scalar. We do not handle is_retval case as we hardcode ptr_err_raw_tp + * handling for known tps. + */ + if (ptr_err_raw_tp) + info->reg_type = SCALAR_VALUE; return true; } EXPORT_SYMBOL_GPL(btf_ctx_access); -- 2.43.5