[PATCH 18/18] tracing/perf: Allow perf to use function based events

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Steven Rostedt (VMware)" <rostedt@xxxxxxxxxxx>

Have perf use function based events.

 # echo 'SyS_openat(int dfd, string buf, x32 flags, x32 mode)' > /sys/kernel/tracing/function_events
 # perf record -e functions:SyS_openat grep task_forks /proc/kallsyms
 # perf script
    grep   913 [002]  5713.413239: functions:SyS_openat: entry_SYSCALL_64_fastpath->sys_openat(dfd=-100, buf=/proc/kallsyms, flags=100, mode=0)

Signed-off-by: Steven Rostedt (VMware) <rostedt@xxxxxxxxxxx>
---
 Documentation/trace/function-based-events.rst |   3 +-
 kernel/trace/trace_event_ftrace.c             | 134 ++++++++++++++++++++------
 2 files changed, 104 insertions(+), 33 deletions(-)

diff --git a/Documentation/trace/function-based-events.rst b/Documentation/trace/function-based-events.rst
index 3b341992b93d..6effde96d3d6 100644
--- a/Documentation/trace/function-based-events.rst
+++ b/Documentation/trace/function-based-events.rst
@@ -48,7 +48,8 @@ enable  filter  format  hist  id  trigger
 
 Even though the above function based event does not record much more
 than the function tracer does, it does become a full fledge event.
-This can be used by the histogram infrastructure, and triggers.
+This can be used by the histogram infrastructure, triggers, and perf
+where one can attach eBPF programs to.
 
  # cat events/functions/do_IRQ/format
 name: do_IRQ
diff --git a/kernel/trace/trace_event_ftrace.c b/kernel/trace/trace_event_ftrace.c
index b5b719680686..b145639eac45 100644
--- a/kernel/trace/trace_event_ftrace.c
+++ b/kernel/trace/trace_event_ftrace.c
@@ -747,46 +747,33 @@ static int get_string(unsigned long addr, unsigned int idx,
 	return len;
 }
 
-static void func_event_trace(struct trace_event_file *trace_file,
-			     struct func_event *func_event,
-			     unsigned long ip, unsigned long parent_ip,
-			     struct pt_regs *pt_regs)
+static int get_event_size(struct func_event *func_event, struct pt_regs *pt_regs,
+			  long *args, int *nr_args)
 {
-	struct func_event_hdr *entry;
-	struct trace_event_call *call = &func_event->call;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct func_arg *arg;
-	long args[func_event->arg_cnt];
-	long long val = 1;
-	unsigned long irq_flags;
-	int str_offset;
-	int str_idx = 0;
-	int nr_args = 0;
 	int size;
-	int pc;
-
-	if (trace_trigger_soft_disabled(trace_file))
-		return;
-
-	local_save_flags(irq_flags);
-	pc = preempt_count();
 
-	size = func_event->arg_offset + sizeof(*entry);
+	size = func_event->arg_offset + sizeof(struct func_event_hdr);
 
 	if (func_event->arg_cnt)
-		nr_args = arch_get_func_args(pt_regs, 0, func_event->arg_cnt, args);
+		*nr_args = arch_get_func_args(pt_regs, 0, func_event->arg_cnt, args);
+	else
+		*nr_args = 0;
 
 	if (func_event->has_strings)
-		size += calculate_strings(func_event, nr_args, args);
+		size += calculate_strings(func_event, *nr_args, args);
 
-	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
-						call->event.type,
-						size, irq_flags, pc);
-	if (!event)
-		return;
+	return size;
+}
+
+static void
+record_entry(struct func_event_hdr *entry, struct func_event *func_event,
+	     unsigned long ip, unsigned long parent_ip, int nr_args, long *args)
+{
+	struct func_arg *arg;
+	long long val;
+	int str_offset;
+	int str_idx = 0;
 
-	entry = ring_buffer_event_data(event);
 	entry->ip = ip;
 	entry->parent_ip = parent_ip;
 
@@ -809,11 +796,80 @@ static void func_event_trace(struct trace_event_file *trace_file,
 		} else
 			memcpy(&entry->data[arg->offset], &val, arg->size);
 	}
+}
+
+static void func_event_trace(struct trace_event_file *trace_file,
+			     struct func_event *func_event,
+			     unsigned long ip, unsigned long parent_ip,
+			     struct pt_regs *pt_regs)
+{
+	struct func_event_hdr *entry;
+	struct trace_event_call *call = &func_event->call;
+	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
+	long args[func_event->arg_cnt];
+	unsigned long irq_flags;
+	int nr_args;
+	int size;
+	int pc;
+
+	if (trace_trigger_soft_disabled(trace_file))
+		return;
+
+	local_save_flags(irq_flags);
+	pc = preempt_count();
+
+	size = get_event_size(func_event, pt_regs, args, &nr_args);
+
+	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
+						call->event.type,
+						size, irq_flags, pc);
+	if (!event)
+		return;
 
+	entry = ring_buffer_event_data(event);
+	record_entry(entry, func_event, ip, parent_ip, nr_args, args);
 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
 					 entry, irq_flags, pc, pt_regs);
 }
 
+#ifdef CONFIG_PERF_EVENTS
+/* Kprobe profile handler */
+static void func_event_perf(struct func_event *func_event,
+			    unsigned long ip, unsigned long parent_ip,
+			    struct pt_regs *pt_regs)
+{
+	struct trace_event_call *call = &func_event->call;
+	struct func_event_hdr *entry;
+	struct hlist_head *head;
+	long args[func_event->arg_cnt];
+	int nr_args = 0;
+	int rctx;
+	int size;
+
+	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, pt_regs))
+		return;
+
+	head = this_cpu_ptr(call->perf_events);
+	if (hlist_empty(head))
+		return;
+
+	size = get_event_size(func_event, pt_regs, args, &nr_args);
+
+	entry = perf_trace_buf_alloc(size, NULL, &rctx);
+	if (!entry)
+		return;
+
+	record_entry(entry, func_event, ip, parent_ip, nr_args, args);
+	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, pt_regs,
+			      head, NULL);
+}
+#else
+static inline void func_event_perf(struct func_event *func_event,
+				   unsigned long ip, unsigned long parent_ip,
+				   struct pt_regs *pt_regs) { }
+#endif
+
 static void
 func_event_call(unsigned long ip, unsigned long parent_ip,
 		    struct ftrace_ops *op, struct pt_regs *pt_regs)
@@ -825,7 +881,10 @@ func_event_call(unsigned long ip, unsigned long parent_ip,
 
 	rcu_read_lock_sched();
 	list_for_each_entry_rcu(ff, &func_event->files, list) {
-		func_event_trace(ff->file, func_event, ip, parent_ip, pt_regs);
+		if (ff->file)
+			func_event_trace(ff->file, func_event, ip, parent_ip, pt_regs);
+		else
+			func_event_perf(func_event, ip, parent_ip, pt_regs);
 	}
 	rcu_read_unlock_sched();
 }
@@ -1041,6 +1100,17 @@ static int func_event_register(struct trace_event_call *event,
 		return enable_func_event(func_event, file);
 	case TRACE_REG_UNREGISTER:
 		return disable_func_event(func_event, file);
+#ifdef CONFIG_PERF_EVENTS
+	case TRACE_REG_PERF_REGISTER:
+		return enable_func_event(func_event, NULL);
+	case TRACE_REG_PERF_UNREGISTER:
+		return disable_func_event(func_event, NULL);
+	case TRACE_REG_PERF_OPEN:
+	case TRACE_REG_PERF_CLOSE:
+	case TRACE_REG_PERF_ADD:
+	case TRACE_REG_PERF_DEL:
+		return 0;
+#endif
 	default:
 		break;
 	}
-- 
2.15.1


--
To unsubscribe from this list: send the line "unsubscribe linux-trace-users" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Development]     [Linux USB Development]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux