Re: [RFC bpf-next 00/16] bpf: Speed up trampoline attach

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 22 Oct 2020 12:21:50 -0400
Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:

> On Thu, 22 Oct 2020 10:42:05 -0400
> Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:
> 
> > I'd like to see how batch functions will work. I guess I need to start
> > looking at the bpf trampoline, to see if we can modify the ftrace
> > trampoline to have a quick access to parameters. It would be much more
> > beneficial to update the existing generic function tracer to have access to
> > function parameters that all users could benefit from, than to tweak a
> > single use case into giving this feature to a single user.  
> 
> Looking at the creation of the bpf trampoline, I think I can modify ftrace
> to have a more flexible callback. Something that passes the callback the
> following:
> 
>  the function being traced.
>  a pointer to the parent caller (that could be modified)
>  a pointer to the original stack frame (what the stack was when the
>       function is entered)
>  An array of the arguments of the function (which could also be modified)
> 
> This is a change I've been wanting to make for some time, because it would
> allow function graph to be a user of function tracer, and would give
> everything access to the arguments.
> 
> We would still need a helper function to store all regs to keep kprobes
> working unmodified, but this would still only be done if asked.
> 
> The above change shouldn't hurt performance for either ftrace or bpf
> because it appears they both do the same. If BPF wants to have a batch
> processing of functions, then I think we should modify ftrace to do this
> new approach instead of creating another set of function trampolines.

The below is a quick proof of concept patch I whipped up. It will always
save 6 arguments, so if BPF is really interested in just saving the bare
minimum of arguments before calling, it can still use direct. But if you
are going to have a generic callback, you'll need to save all parameters
otherwise you can corrupt the function's parameter if traced function uses
more than you save.

Which looking at the bpf trampoline code, I noticed that if the verifier
can't find the btf func, it falls back to saving 5 parameters. Which can be
a bug on x86 if the function itself uses 6 or more. If you only save 5
parameters, then call a bpf program that calls a helper function that uses
more than 5 parameters, it will likely corrupt the 6th parameter of the
function being traced.

The code in question is this:

int btf_distill_func_proto(struct bpf_verifier_log *log,
			   struct btf *btf,
			   const struct btf_type *func,
			   const char *tname,
			   struct btf_func_model *m)
{
	const struct btf_param *args;
	const struct btf_type *t;
	u32 i, nargs;
	int ret;

	if (!func) {
		/* BTF function prototype doesn't match the verifier types.
		 * Fall back to 5 u64 args.
		 */
		for (i = 0; i < 5; i++)
			m->arg_size[i] = 8;
		m->ret_size = 8;
		m->nr_args = 5;
		return 0;
	}

Shouldn't it be falling back to 6, not 5?

-- Steve

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 7edbd5ee5ed4..b65d73f430ed 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -287,6 +287,10 @@ extern void ftrace_caller_end(void);
 extern void ftrace_caller_op_ptr(void);
 extern void ftrace_regs_caller_op_ptr(void);
 extern void ftrace_regs_caller_jmp(void);
+extern void ftrace_args_caller(void);
+extern void ftrace_args_call(void);
+extern void ftrace_args_caller_end(void);
+extern void ftrace_args_caller_op_ptr(void);
 
 /* movq function_trace_op(%rip), %rdx */
 /* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */
@@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	unsigned long end_offset;
 	unsigned long op_offset;
 	unsigned long call_offset;
-	unsigned long jmp_offset;
+	unsigned long jmp_offset = 0;
 	unsigned long offset;
 	unsigned long npages;
 	unsigned long size;
@@ -336,12 +340,16 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 		op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
 		call_offset = (unsigned long)ftrace_regs_call;
 		jmp_offset = (unsigned long)ftrace_regs_caller_jmp;
+	} else if (ops->flags & FTRACE_OPS_FL_ARGS) {
+		start_offset = (unsigned long)ftrace_args_caller;
+		end_offset = (unsigned long)ftrace_args_caller_end;
+		op_offset = (unsigned long)ftrace_args_caller_op_ptr;
+		call_offset = (unsigned long)ftrace_args_call;
 	} else {
 		start_offset = (unsigned long)ftrace_caller;
 		end_offset = (unsigned long)ftrace_caller_end;
 		op_offset = (unsigned long)ftrace_caller_op_ptr;
 		call_offset = (unsigned long)ftrace_call;
-		jmp_offset = 0;
 	}
 
 	size = end_offset - start_offset;
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index ac3d5f22fe64..65ca634d0b37 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -176,6 +176,58 @@ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
 	retq
 SYM_FUNC_END(ftrace_epilogue)
 
+SYM_FUNC_START(ftrace_args_caller)
+#ifdef CONFIG_FRAME_POINTER
+	push %rdp
+	movq %rsp %rdp
+# define CALLED_OFFEST (7 * 8)
+# define PARENT_OFFSET (8 * 8)
+#else
+# define CALLED_OFFSET (6 * 8)
+# define PARENT_OFFSET (7 * 8)
+#endif
+	/* save args */
+	pushq %r9
+	pushq %r8
+	pushq %rcx
+	pushq %rdx
+	pushq %rsi
+	pushq %rdi
+
+	/*
+	 * Parameters:
+	 *   Called site (function called)
+	 *   Address of parent location
+	 *   pointer to ftrace_ops
+	 *   Location of stack when function was called
+	 *   Array of arguments.
+	 */
+	movq CALLED_OFFSET(%rsp), %rdi
+	leaq PARENT_OFFSET(%rsp), %rsi
+SYM_INNER_LABEL(ftrace_args_caller_op_ptr, SYM_L_GLOBAL)
+	/* Load the ftrace_ops into the 3rd parameter */
+	movq function_trace_op(%rip), %rdx
+	movq %rsi, %rcx
+	leaq 0(%rsp), %r8
+
+SYM_INNER_LABEL(ftrace_args_call, SYM_L_GLOBAL)
+	callq ftrace_stub
+
+	popq %rdi
+	popq %rsi
+	popq %rdx
+	popq %rcx
+	popq %r8
+	popq %r9
+
+#ifdef CONFIG_FRAME_POINTER
+	popq %rdp
+#endif
+
+SYM_INNER_LABEL(ftrace_args_caller_end, SYM_L_GLOBAL)
+	jmp ftrace_epilogue
+SYM_FUNC_END(ftrace_args_caller)
+
 SYM_FUNC_START(ftrace_regs_caller)
 	/* Save the current flags before any operations that can change them */
 	pushfq
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1bd3a0356ae4..0d077e8d7bb4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -92,6 +92,17 @@ struct ftrace_ops;
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
 			      struct ftrace_ops *op, struct pt_regs *regs);
 
+typedef void (*ftrace_args_func_t)(unsigned long ip, unsigned long *parent_ip,
+				   struct ftrace_ops *op, unsigned long *stack,
+				   unsigned long *args);
+
+union ftrace_callback {
+	ftrace_func_t		func;
+	ftrace_args_func_t	args_func;
+};
+
+typedef union ftrace_callback ftrace_callback_t;
+
 ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
 
 /*
@@ -169,6 +180,7 @@ enum {
 	FTRACE_OPS_FL_TRACE_ARRAY		= BIT(15),
 	FTRACE_OPS_FL_PERMANENT                 = BIT(16),
 	FTRACE_OPS_FL_DIRECT			= BIT(17),
+	FTRACE_OPS_FL_ARGS			= BIT(18),
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -447,9 +459,11 @@ enum {
 	FTRACE_FL_DISABLED	= (1UL << 25),
 	FTRACE_FL_DIRECT	= (1UL << 24),
 	FTRACE_FL_DIRECT_EN	= (1UL << 23),
+	FTRACE_FL_ARGS		= (1UL << 22),
+	FTRACE_FL_ARGS_EN	= (1UL << 21),
 };
 
-#define FTRACE_REF_MAX_SHIFT	23
+#define FTRACE_REF_MAX_SHIFT	21
 #define FTRACE_REF_MAX		((1UL << FTRACE_REF_MAX_SHIFT) - 1)
 
 #define ftrace_rec_count(rec)	((rec)->flags & FTRACE_REF_MAX)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4833b6a82ce7..5632b0809dc0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1721,6 +1721,9 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			if (ops->flags & FTRACE_OPS_FL_DIRECT)
 				rec->flags |= FTRACE_FL_DIRECT;
 
+			else if (ops->flags & FTRACE_OPS_FL_ARGS)
+				rec->flags |= FTRACE_FL_ARGS;
+
 			/*
 			 * If there's only a single callback registered to a
 			 * function, and the ops has a trampoline registered
@@ -1757,6 +1760,10 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			if (ops->flags & FTRACE_OPS_FL_DIRECT)
 				rec->flags &= ~FTRACE_FL_DIRECT;
 
+			/* POC: but we will have more than one */
+			if (ops->flags & FTRACE_OPS_FL_ARGS)
+				rec->flags &= ~FTRACE_FL_ARGS;
+
 			/*
 			 * If the rec had REGS enabled and the ops that is
 			 * being removed had REGS set, then see if there is
@@ -2103,6 +2110,13 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
 		    !(rec->flags & FTRACE_FL_TRAMP_EN))
 			flag |= FTRACE_FL_TRAMP;
 
+		/* Proof of concept */
+		if (ftrace_rec_count(rec) == 1) {
+			if (!(rec->flags & FTRACE_FL_ARGS) !=
+			    !(rec->flags & FTRACE_FL_ARGS_EN))
+				flag |= FTRACE_FL_ARGS;
+		}
+
 		/*
 		 * Direct calls are special, as count matters.
 		 * We must test the record for direct, if the
@@ -2144,6 +2158,17 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
 				else
 					rec->flags &= ~FTRACE_FL_TRAMP_EN;
 			}
+			if (flag & FTRACE_FL_ARGS) {
+				if (ftrace_rec_count(rec) == 1) {
+					if (rec->flags & FTRACE_FL_ARGS)
+						rec->flags |= FTRACE_FL_ARGS_EN;
+					else
+						rec->flags &= ~FTRACE_FL_ARGS_EN;
+				} else {
+					rec->flags &= ~FTRACE_FL_ARGS_EN;
+				}
+			}
+
 			if (flag & FTRACE_FL_DIRECT) {
 				/*
 				 * If there's only one user (direct_ops helper)
@@ -2192,7 +2217,8 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
 			 * and REGS states. The _EN flags must be disabled though.
 			 */
 			rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN |
-					FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN);
+					FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN |
+					FTRACE_FL_ARGS_EN);
 	}
 
 	ftrace_bug_type = FTRACE_BUG_NOP;
@@ -3630,7 +3656,8 @@ static int t_show(struct seq_file *m, void *v)
 			   ftrace_rec_count(rec),
 			   rec->flags & FTRACE_FL_REGS ? " R" : "  ",
 			   rec->flags & FTRACE_FL_IPMODIFY ? " I" : "  ",
-			   rec->flags & FTRACE_FL_DIRECT ? " D" : "  ");
+			   rec->flags & FTRACE_FL_DIRECT ? " D" :
+			   rec->flags & FTRACE_FL_ARGS ? " A" : "  ");
 		if (rec->flags & FTRACE_FL_TRAMP_EN) {
 			ops = ftrace_find_tramp_ops_any(rec);
 			if (ops) {
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 2c2126e1871d..a3da84b0e599 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -86,9 +86,48 @@ void ftrace_destroy_function_files(struct trace_array *tr)
 	ftrace_free_ftrace_ops(tr);
 }
 
+static void function_args_trace_call(unsigned long ip,
+				     unsigned long *parent_ip,
+				     struct ftrace_ops *op,
+				     unsigned long *stack,
+				     unsigned long *args)
+{
+	struct trace_array *tr = op->private;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+	int bit;
+	int cpu;
+	int pc;
+
+	if (unlikely(!tr->function_enabled))
+		return;
+
+	pc = preempt_count();
+	preempt_disable_notrace();
+
+	bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+	if (bit < 0)
+		goto out;
+
+	cpu = smp_processor_id();
+	data = per_cpu_ptr(tr->array_buffer.data, cpu);
+	if (!atomic_read(&data->disabled)) {
+		local_save_flags(flags);
+		trace_function(tr, ip, *parent_ip, flags, pc);
+		trace_printk("%pS %lx %lx %lx %lx %lx %lx\n",
+			     (void *)ip, args[0], args[1], args[2], args[3],
+			     args[4], args[5]);
+	}
+	trace_clear_recursion(bit);
+
+ out:
+	preempt_enable_notrace();
+
+}
+
 static int function_trace_init(struct trace_array *tr)
 {
-	ftrace_func_t func;
+	ftrace_callback_t callback;
 
 	/*
 	 * Instance trace_arrays get their ops allocated
@@ -101,11 +140,14 @@ static int function_trace_init(struct trace_array *tr)
 	/* Currently only the global instance can do stack tracing */
 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
 	    func_flags.val & TRACE_FUNC_OPT_STACK)
-		func = function_stack_trace_call;
-	else
-		func = function_trace_call;
+		callback.func = function_stack_trace_call;
+	else {
+		tr->ops->flags |= FTRACE_OPS_FL_ARGS;
+		callback.args_func = function_args_trace_call;
+	}
+//		func = function_trace_call;
 
-	ftrace_init_array_ops(tr, func);
+	ftrace_init_array_ops(tr, callback.func);
 
 	tr->array_buffer.cpu = get_cpu();
 	put_cpu();



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux