On Mon, 9 Sep 2024 16:16:46 -0400 Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx> wrote: > In preparation for allowing system call enter/exit instrumentation to > handle page faults, make sure that ftrace can handle this change by > explicitly disabling preemption within the ftrace system call tracepoint > probes to respect the current expectations within ftrace ring buffer > code. > > This change does not yet allow ftrace to take page faults per se within > its probe, but allows its existing probes to adapt to the upcoming > change. OK, this looks good to me. Acked-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx> Thank you, > > Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx> > Cc: Michael Jeanson <mjeanson@xxxxxxxxxxxx> > Cc: Steven Rostedt <rostedt@xxxxxxxxxxx> > Cc: Masami Hiramatsu <mhiramat@xxxxxxxxxx> > Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > Cc: Alexei Starovoitov <ast@xxxxxxxxxx> > Cc: Yonghong Song <yhs@xxxxxx> > Cc: Paul E. McKenney <paulmck@xxxxxxxxxx> > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> > Cc: Mark Rutland <mark.rutland@xxxxxxx> > Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx> > Cc: Namhyung Kim <namhyung@xxxxxxxxxx> > Cc: Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> > Cc: bpf@xxxxxxxxxxxxxxx > Cc: Joel Fernandes <joel@xxxxxxxxxxxxxxxxx> > --- > include/trace/trace_events.h | 38 ++++++++++++++++++++++++++++------- > kernel/trace/trace_syscalls.c | 12 +++++++++++ > 2 files changed, 43 insertions(+), 7 deletions(-) > > diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h > index 8bcbb9ee44de..0228d9ed94a3 100644 > --- a/include/trace/trace_events.h > +++ b/include/trace/trace_events.h > @@ -263,6 +263,9 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ > tstruct \ > {} }; > > +#undef DECLARE_EVENT_SYSCALL_CLASS > +#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS > + > #undef DEFINE_EVENT_PRINT > #define DEFINE_EVENT_PRINT(template, name, proto, args, print) > > @@ -396,11 +399,11 @@ static inline notrace int trace_event_get_offsets_##call( \ > > #include "stages/stage6_event_callback.h" > > -#undef DECLARE_EVENT_CLASS > -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ > - \ > + > +#undef __DECLARE_EVENT_CLASS > +#define __DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ > static notrace void \ > -trace_event_raw_event_##call(void *__data, proto) \ > +do_trace_event_raw_event_##call(void *__data, proto) \ > { \ > struct trace_event_file *trace_file = __data; \ > struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ > @@ -425,15 +428,34 @@ trace_event_raw_event_##call(void *__data, proto) \ > \ > trace_event_buffer_commit(&fbuffer); \ > } > + > +#undef DECLARE_EVENT_CLASS > +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ > +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ > + PARAMS(assign), PARAMS(print)) \ > +static notrace void \ > +trace_event_raw_event_##call(void *__data, proto) \ > +{ \ > + do_trace_event_raw_event_##call(__data, args); \ > +} > + > +#undef DECLARE_EVENT_SYSCALL_CLASS > +#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \ > +__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ > + PARAMS(assign), PARAMS(print)) \ > +static notrace void \ > +trace_event_raw_event_##call(void *__data, proto) \ > +{ \ > + guard(preempt_notrace)(); \ > + do_trace_event_raw_event_##call(__data, args); \ > +} > + > /* > * The ftrace_test_probe is compiled out, it is only here as a build time check > * to make sure that if the tracepoint handling changes, the ftrace probe will > * fail to compile unless it too is updated. > */ > > -#undef DECLARE_EVENT_SYSCALL_CLASS > -#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS > - > #undef DEFINE_EVENT > #define DEFINE_EVENT(template, call, proto, args) \ > static inline void ftrace_test_probe_##call(void) \ > @@ -443,6 +465,8 @@ static inline void ftrace_test_probe_##call(void) \ > > #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) > > +#undef __DECLARE_EVENT_CLASS > + > #include "stages/stage7_class_define.h" > > #undef DECLARE_EVENT_CLASS > diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c > index 067f8e2b930f..abf0e0b7cd0b 100644 > --- a/kernel/trace/trace_syscalls.c > +++ b/kernel/trace/trace_syscalls.c > @@ -299,6 +299,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) > int syscall_nr; > int size; > > + /* > + * Syscall probe called with preemption enabled, but the ring > + * buffer and per-cpu data require preemption to be disabled. > + */ > + guard(preempt_notrace)(); > + > syscall_nr = trace_get_syscall_nr(current, regs); > if (syscall_nr < 0 || syscall_nr >= NR_syscalls) > return; > @@ -338,6 +344,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) > struct trace_event_buffer fbuffer; > int syscall_nr; > > + /* > + * Syscall probe called with preemption enabled, but the ring > + * buffer and per-cpu data require preemption to be disabled. > + */ > + guard(preempt_notrace)(); > + > syscall_nr = trace_get_syscall_nr(current, regs); > if (syscall_nr < 0 || syscall_nr >= NR_syscalls) > return; > -- > 2.39.2 > -- Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>