On Mon, Nov 6, 2023 at 2:15 PM Jordan Rome <jordalgo@xxxxxxxx> wrote: > > Currently `get_perf_callchain` only supports user stack walking for > the current task. Passing the correct *crosstask* param will return > -EFAULT if the task passed to `__bpf_get_stack` isn't the current > one instead of a single incorrect frame/address. > > This issue was found using `bpf_get_task_stack` inside a BPF > iterator ("iter/task"), which iterates over all tasks. > `bpf_get_task_stack` works fine for fetching kernel stacks > but because `get_perf_callchain` relies on the caller to know > if the requested *task* is the current one (via *crosstask*) > it wasn't returning an error. > > It might be possible to get user stacks for all tasks utilizing > something like `access_process_vm` but that requires the bpf > program calling `bpf_get_task_stack` to be sleepable and would > therefore be a breaking change. > > Fixes: fa28dcb82a38 ("bpf: Introduce helper bpf_get_task_stack()") > Signed-off-by: Jordan Rome <jordalgo@xxxxxxxx> > --- > include/uapi/linux/bpf.h | 3 +++ > kernel/bpf/stackmap.c | 3 ++- > tools/include/uapi/linux/bpf.h | 3 +++ > tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 3 +++ > tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c | 5 +++++ > 5 files changed, 16 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 0f6cdf52b1da..da2871145274 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -4517,6 +4517,8 @@ union bpf_attr { > * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) > * Description > * Return a user or a kernel stack in bpf program provided buffer. > + * Note: the user stack will only be populated if the *task* is > + * the current task; all other tasks will return -EFAULT. I thought that you were not getting an error even for a non-current task with BPF_F_USER_STACK? Shouldn't we make sure to return error (-ENOTSUP?) for such cases? Taking a quick look at get_perf_callchain(), it doesn't seem to return NULL in such cases. > * To achieve this, the helper needs *task*, which is a valid > * pointer to **struct task_struct**. To store the stacktrace, the > * bpf program provides *buf* with a nonnegative *size*. > @@ -4528,6 +4530,7 @@ union bpf_attr { > * > * **BPF_F_USER_STACK** > * Collect a user space stack instead of a kernel stack. > + * The *task* must be the current task. > * **BPF_F_USER_BUILD_ID** > * Collect buildid+offset instead of ips for user stack, > * only valid if **BPF_F_USER_STACK** is also specified. > diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c > index d6b277482085..96641766e90c 100644 > --- a/kernel/bpf/stackmap.c > +++ b/kernel/bpf/stackmap.c > @@ -388,6 +388,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, > { > u32 trace_nr, copy_len, elem_size, num_elem, max_depth; > bool user_build_id = flags & BPF_F_USER_BUILD_ID; > + bool crosstask = task && task != current; > u32 skip = flags & BPF_F_SKIP_FIELD_MASK; > bool user = flags & BPF_F_USER_STACK; > struct perf_callchain_entry *trace; > @@ -421,7 +422,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, > trace = get_callchain_entry_for_task(task, max_depth); > else > trace = get_perf_callchain(regs, 0, kernel, user, max_depth, > - false, false); > + crosstask, false); > if (unlikely(!trace)) > goto err_fault; > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 0f6cdf52b1da..da2871145274 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -4517,6 +4517,8 @@ union bpf_attr { > * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) > * Description > * Return a user or a kernel stack in bpf program provided buffer. > + * Note: the user stack will only be populated if the *task* is > + * the current task; all other tasks will return -EFAULT. > * To achieve this, the helper needs *task*, which is a valid > * pointer to **struct task_struct**. To store the stacktrace, the > * bpf program provides *buf* with a nonnegative *size*. > @@ -4528,6 +4530,7 @@ union bpf_attr { > * > * **BPF_F_USER_STACK** > * Collect a user space stack instead of a kernel stack. > + * The *task* must be the current task. > * **BPF_F_USER_BUILD_ID** > * Collect buildid+offset instead of ips for user stack, > * only valid if **BPF_F_USER_STACK** is also specified. > diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > index 4e02093c2cbe..757635145510 100644 > --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c > @@ -332,6 +332,9 @@ static void test_task_stack(void) > do_dummy_read(skel->progs.dump_task_stack); > do_dummy_read(skel->progs.get_task_user_stacks); > > + ASSERT_EQ(skel->bss->num_user_stacks, 1, > + "num_user_stacks"); > + please split selftests into a separate patch > bpf_iter_task_stack__destroy(skel); > } > > diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c > index f2b8167b72a8..442f4ca39fd7 100644 > --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c > +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c > @@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx) > return 0; > } > > +int num_user_stacks = 0; > + > SEC("iter/task") > int get_task_user_stacks(struct bpf_iter__task *ctx) > { > @@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx) > if (res <= 0) > return 0; > > + /* Only one task, the current one, should succeed */ > + ++num_user_stacks; > + > buf_sz += res; > > /* If the verifier doesn't refine bpf_get_task_stack res, and instead > -- > 2.39.3 >