From: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx> Skip recording calltime and rettime if the fgraph_ops does not need it. This is a kind of performance optimization for fprobe. Since the fprobe user does not use these entries, recording timestamp in fgraph is just a overhead (e.g. eBPF, ftrace). So introduce the skip_timestamp flag, and all fgraph_ops sets this flag, skip recording calltime and rettime. Here is the performance results measured by tools/testing/selftests/bpf/benchs/run_bench_trigger.sh Without this: kprobe-multi : 5.700 ± 0.065M/s kretprobe-multi: 4.239 ± 0.006M/s With skip-timestamp: kprobe-multi : 6.265 ± 0.033M/s +9.91% kretprobe-multi: 4.758 ± 0.009M/s +12.24% Suggested-by: Jiri Olsa <olsajiri@xxxxxxxxx> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx> --- Changes in v11: - Simplify it to be symmetric on push and pop. (Thus the timestamp getting place is a bit shifted.) Changes in v10: - Add likely() to skipping timestamp. Changes in v9: - Newly added. --- include/linux/ftrace.h | 2 ++ kernel/trace/fgraph.c | 36 +++++++++++++++++++++++++++++++++--- kernel/trace/fprobe.c | 1 + 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d8a58b940d81..fabf1a0979d4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1160,6 +1160,8 @@ struct fgraph_ops { void *private; trace_func_graph_ent_t saved_func; int idx; + /* If skip_timestamp is true, this does not record timestamps. */ + bool skip_timestamp; }; void *fgraph_reserve_data(int idx, int size_bytes); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d735a8c872bb..cf3ae59a436e 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -174,6 +174,7 @@ int ftrace_graph_active; static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE]; static unsigned long fgraph_array_bitmask; +static bool fgraph_skip_timestamp; /* LRU index table for fgraph_array */ static int fgraph_lru_table[FGRAPH_ARRAY_SIZE]; @@ -557,7 +558,11 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, return -EBUSY; } - calltime = trace_clock_local(); + /* This is not really 'likely' but for keeping the least path to be faster. */ + if (likely(fgraph_skip_timestamp)) + calltime = 0LL; + else + calltime = trace_clock_local(); offset = READ_ONCE(current->curr_ret_stack); ret_stack = RET_STACK(current, offset); @@ -728,6 +733,12 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, *ret = ret_stack->ret; trace->func = ret_stack->func; trace->calltime = ret_stack->calltime; + /* This is not really 'likely' but for keeping the least path to be faster. */ + if (likely(!trace->calltime)) + trace->rettime = 0LL; + else + trace->rettime = trace_clock_local(); + trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = current->curr_ret_depth; /* @@ -788,7 +799,6 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe return (unsigned long)panic; } - trace.rettime = trace_clock_local(); if (fregs) ftrace_regs_set_instruction_pointer(fregs, ret); @@ -1242,6 +1252,24 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +static void update_fgraph_skip_timestamp(void) +{ + int i; + + for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) { + struct fgraph_ops *gops = fgraph_array[i]; + + if (gops == &fgraph_stub) + continue; + + if (!gops->skip_timestamp) { + fgraph_skip_timestamp = false; + return; + } + } + fgraph_skip_timestamp = true; +} + int register_ftrace_graph(struct fgraph_ops *gops) { int command = 0; @@ -1267,6 +1295,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) gops->idx = i; ftrace_graph_active++; + update_fgraph_skip_timestamp(); if (ftrace_graph_active == 2) ftrace_graph_disable_direct(true); @@ -1298,6 +1327,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) ftrace_graph_active--; gops->saved_func = NULL; fgraph_lru_release_index(i); + update_fgraph_skip_timestamp(); } out: mutex_unlock(&ftrace_lock); @@ -1321,8 +1351,8 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) goto out; fgraph_array[gops->idx] = &fgraph_stub; - ftrace_graph_active--; + update_fgraph_skip_timestamp(); if (!ftrace_graph_active) command = FTRACE_STOP_FUNC_RET; diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 5a0b4ef52fa7..b108d26d7ee5 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -345,6 +345,7 @@ NOKPROBE_SYMBOL(fprobe_return); static struct fgraph_ops fprobe_graph_ops = { .entryfunc = fprobe_entry, .retfunc = fprobe_return, + .skip_timestamp = true, }; static int fprobe_graph_active;