From: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx> Skip recording calltime and rettime if the fgraph_ops does not need it. This is a kind of performance optimization for fprobe. Since the fprobe user does not use these entries, recording timestamp in fgraph is just a overhead (e.g. eBPF, ftrace). So introduce the skip_timestamp flag, and all fgraph_ops sets this flag, skip recording calltime and rettime. Suggested-by: Jiri Olsa <olsajiri@xxxxxxxxx> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx> --- Changes in v9: - Newly added. --- include/linux/ftrace.h | 2 ++ kernel/trace/fgraph.c | 46 +++++++++++++++++++++++++++++++++++++++------- kernel/trace/fprobe.c | 1 + 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d845a80a3d56..06fc7cbef897 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1156,6 +1156,8 @@ struct fgraph_ops { struct ftrace_ops ops; /* for the hash lists */ void *private; int idx; + /* If skip_timestamp is true, this does not record timestamps. */ + bool skip_timestamp; }; void *fgraph_reserve_data(int idx, int size_bytes); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 7556fbbae323..a5722537bb79 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -131,6 +131,7 @@ DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE]; +static bool fgraph_skip_timestamp; /* LRU index table for fgraph_array */ static int fgraph_lru_table[FGRAPH_ARRAY_SIZE]; @@ -475,7 +476,7 @@ void ftrace_graph_stop(void) static int ftrace_push_return_trace(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp, - int fgraph_idx) + int fgraph_idx, bool skip_ts) { struct ftrace_ret_stack *ret_stack; unsigned long long calltime; @@ -498,8 +499,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, ret_stack = get_ret_stack(current, current->curr_ret_stack, &index); if (ret_stack && ret_stack->func == func && get_fgraph_type(current, index + FGRAPH_RET_INDEX) == FGRAPH_TYPE_BITMAP && - !is_fgraph_index_set(current, index + FGRAPH_RET_INDEX, fgraph_idx)) + !is_fgraph_index_set(current, index + FGRAPH_RET_INDEX, fgraph_idx)) { + /* If previous one skips calltime, update it. */ + if (!skip_ts && !ret_stack->calltime) + ret_stack->calltime = trace_clock_local(); return index + FGRAPH_RET_INDEX; + } val = (FGRAPH_TYPE_RESERVED << FGRAPH_TYPE_SHIFT) | FGRAPH_RET_INDEX; @@ -517,7 +522,10 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, return -EBUSY; } - calltime = trace_clock_local(); + if (skip_ts) + calltime = 0LL; + else + calltime = trace_clock_local(); index = READ_ONCE(current->curr_ret_stack); ret_stack = RET_STACK(current, index); @@ -601,7 +609,8 @@ int function_graph_enter_regs(unsigned long ret, unsigned long func, trace.func = func; trace.depth = ++current->curr_ret_depth; - index = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); + index = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0, + fgraph_skip_timestamp); if (index < 0) goto out; @@ -654,7 +663,8 @@ int function_graph_enter_ops(unsigned long ret, unsigned long func, return -ENODEV; /* Use start for the distance to ret_stack (skipping over reserve) */ - index = ftrace_push_return_trace(ret, func, frame_pointer, retp, gops->idx); + index = ftrace_push_return_trace(ret, func, frame_pointer, retp, gops->idx, + gops->skip_timestamp); if (index < 0) return index; type = get_fgraph_type(current, index); @@ -732,6 +742,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, *ret = ret_stack->ret; trace->func = ret_stack->func; trace->calltime = ret_stack->calltime; + trace->rettime = 0; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = current->curr_ret_depth; /* @@ -792,7 +803,6 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe return (unsigned long)panic; } - trace.rettime = trace_clock_local(); if (fregs) ftrace_regs_set_instruction_pointer(fregs, ret); @@ -808,6 +818,8 @@ __ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointe continue; if (gops == &fgraph_stub) continue; + if (!trace.rettime && !gops->skip_timestamp) + trace.rettime = trace_clock_local(); gops->retfunc(&trace, gops, fregs); } @@ -1185,6 +1197,24 @@ static void init_task_vars(int idx) read_unlock(&tasklist_lock); } +static void update_fgraph_skip_timestamp(void) +{ + int i; + + for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) { + struct fgraph_ops *gops = fgraph_array[i]; + + if (gops == &fgraph_stub) + continue; + + if (!gops->skip_timestamp) { + fgraph_skip_timestamp = false; + return; + } + } + fgraph_skip_timestamp = true; +} + int register_ftrace_graph(struct fgraph_ops *gops) { int command = 0; @@ -1219,6 +1249,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) gops->idx = i; ftrace_graph_active++; + update_fgraph_skip_timestamp(); if (ftrace_graph_active == 1) { register_pm_notifier(&ftrace_suspend_notifier); @@ -1242,6 +1273,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) fgraph_array[i] = &fgraph_stub; ftrace_graph_active--; fgraph_lru_release_index(i); + update_fgraph_skip_timestamp(); } out: mutex_unlock(&ftrace_lock); @@ -1265,8 +1297,8 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) goto out; fgraph_array[gops->idx] = &fgraph_stub; - ftrace_graph_active--; + update_fgraph_skip_timestamp(); if (!ftrace_graph_active) command = FTRACE_STOP_FUNC_RET; diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index afa52d9816cf..24bb8edec8a3 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -345,6 +345,7 @@ NOKPROBE_SYMBOL(fprobe_return); static struct fgraph_ops fprobe_graph_ops = { .entryfunc = fprobe_entry, .retfunc = fprobe_return, + .skip_timestamp = true, }; static int fprobe_graph_active;