On Tue, Mar 18, 2025 at 04:38:20PM -0700, Song Liu wrote: > On Tue, Mar 18, 2025 at 4:00 PM Josh Poimboeuf <jpoimboe@xxxxxxxxxx> wrote: > > - even in the -ENOENT case the unreliable bit has already been set > > right before the call to kunwind_next_frame_record_meta(). > > For this one, do you mean we set state->common.unreliable, but > failed to propagate it to data.unreliable? Hm, I hadn't noticed that. That code is quite the maze. It's unfortunate there are two separate 'unreliable' variables. It looks like consume_state() is the only way they get synced? How does that work if kunwind_next() returns an error and skips consume_state()? Or if kunwind_recover_return_address() returns an error to kunwind_next()? What I actually meant was the following: do_kunwind() kunwind_next() kunwind_next_frame_record() state->common.unreliable = true; kunwind_next_frame_record_meta() return -ENOENT; Notice that in the success case (-ENOENT), unreliable has already been set. Actually I think it would be much simpler to just propagate -ENOENT down the call chain. Then no 'unreliable' bits needed. Like so (instead of original patch): diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c9fe3e7566a6..5713fad567c5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -276,6 +276,7 @@ config ARM64 select HAVE_SOFTIRQ_ON_OWN_STACK select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM + select HAVE_RELIABLE_STACKTRACE help ARM 64-bit (AArch64) Linux support. @@ -2509,4 +2510,3 @@ endmenu # "CPU Power Management" source "drivers/acpi/Kconfig" source "arch/arm64/kvm/Kconfig" - diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 1d9d51d7627f..e227da842bc3 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -277,22 +277,28 @@ kunwind_next(struct kunwind_state *state) typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie); -static __always_inline void +static __always_inline int do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state, void *cookie) { - if (kunwind_recover_return_address(state)) - return; + int ret; + + ret = kunwind_recover_return_address(state); + if (ret) + return ret; while (1) { int ret; if (!consume_state(state, cookie)) - break; + return -EINVAL; + ret = kunwind_next(state); - if (ret < 0) - break; + if (ret) + return ret; } + + return -EINVAL; } /* @@ -324,7 +330,7 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state, : stackinfo_get_unknown(); \ }) -static __always_inline void +static __always_inline int kunwind_stack_walk(kunwind_consume_fn consume_state, void *cookie, struct task_struct *task, struct pt_regs *regs) @@ -352,7 +358,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state, if (regs) { if (task != current) - return; + return -EINVAL; kunwind_init_from_regs(&state, regs); } else if (task == current) { kunwind_init_from_caller(&state); @@ -360,7 +366,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state, kunwind_init_from_task(&state, task); } - do_kunwind(&state, consume_state, cookie); + return do_kunwind(&state, consume_state, cookie); } struct kunwind_consume_entry_data { @@ -387,6 +393,25 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry, kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs); } +noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) +{ + int ret; + struct kunwind_consume_entry_data data = { + .consume_entry = consume_entry, + .cookie = cookie, + }; + + ret = kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, NULL); + if (ret) { + if (ret == -ENOENT) + return 0; + return ret; + } + + return -EINVAL; +} + struct bpf_unwind_consume_entry_data { bool (*consume_entry)(void *cookie, u64 ip, u64 sp, u64 fp); void *cookie;