On Fri, 5 May 2023 12:12:12 -0400 Steven Rostedt <rostedt@xxxxxxxxxxx> wrote: > On Tue, 2 May 2023 11:17:36 +0900 > "Masami Hiramatsu (Google)" <mhiramat@xxxxxxxxxx> wrote: > > > diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c > > new file mode 100644 > > index 000000000000..0049d9ef2402 > > --- /dev/null > > +++ b/kernel/trace/trace_fprobe.c > > @@ -0,0 +1,1053 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Fprobe-based tracing events > > + * Copyright (C) 2022 Google LLC. > > + */ > > +#define pr_fmt(fmt) "trace_fprobe: " fmt > > + > > +#include <linux/fprobe.h> > > +#include <linux/module.h> > > +#include <linux/rculist.h> > > +#include <linux/security.h> > > +#include <linux/uaccess.h> > > + > > +#include "trace_dynevent.h" > > +#include "trace_probe.h" > > +#include "trace_probe_kernel.h" > > +#include "trace_probe_tmpl.h" > > + > > +#define FPROBE_EVENT_SYSTEM "fprobes" > > +#define RETHOOK_MAXACTIVE_MAX 4096 > > + > > +static int trace_fprobe_create(const char *raw_command); > > +static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev); > > +static int trace_fprobe_release(struct dyn_event *ev); > > +static bool trace_fprobe_is_busy(struct dyn_event *ev); > > +static bool trace_fprobe_match(const char *system, const char *event, > > + int argc, const char **argv, struct dyn_event *ev); > > + > > +static struct dyn_event_operations trace_fprobe_ops = { > > + .create = trace_fprobe_create, > > + .show = trace_fprobe_show, > > + .is_busy = trace_fprobe_is_busy, > > + .free = trace_fprobe_release, > > + .match = trace_fprobe_match, > > +}; > > + > > +/* > > + * Fprobe event core functions > > + */ > > +struct trace_fprobe { > > + struct dyn_event devent; > > + struct fprobe fp; > > + const char *symbol; > > + struct trace_probe tp; > > +}; > > + > > +static bool is_trace_fprobe(struct dyn_event *ev) > > +{ > > + return ev->ops == &trace_fprobe_ops; > > +} > > + > > +static struct trace_fprobe *to_trace_fprobe(struct dyn_event *ev) > > +{ > > + return container_of(ev, struct trace_fprobe, devent); > > +} > > + > > +/** > > + * for_each_trace_fprobe - iterate over the trace_fprobe list > > + * @pos: the struct trace_fprobe * for each entry > > + * @dpos: the struct dyn_event * to use as a loop cursor > > + */ > > +#define for_each_trace_fprobe(pos, dpos) \ > > + for_each_dyn_event(dpos) \ > > + if (is_trace_fprobe(dpos) && (pos = to_trace_fprobe(dpos))) > > + > > +static bool trace_fprobe_is_return(struct trace_fprobe *tf) > > +{ > > + return tf->fp.exit_handler != NULL; > > +} > > + > > +static const char *trace_fprobe_symbol(struct trace_fprobe *tf) > > +{ > > + return tf->symbol ? tf->symbol : "unknown"; > > +} > > + > > +static bool trace_fprobe_is_busy(struct dyn_event *ev) > > +{ > > + struct trace_fprobe *tf = to_trace_fprobe(ev); > > + > > + return trace_probe_is_enabled(&tf->tp); > > +} > > + > > +static bool trace_fprobe_match_command_head(struct trace_fprobe *tf, > > + int argc, const char **argv) > > +{ > > + char buf[MAX_ARGSTR_LEN + 1]; > > + > > + if (!argc) > > + return true; > > + > > + snprintf(buf, sizeof(buf), "%s", trace_fprobe_symbol(tf)); > > + if (strcmp(buf, argv[0])) > > + return false; > > + argc--; argv++; > > + > > + return trace_probe_match_command_args(&tf->tp, argc, argv); > > +} > > + > > +static bool trace_fprobe_match(const char *system, const char *event, > > + int argc, const char **argv, struct dyn_event *ev) > > +{ > > + struct trace_fprobe *tf = to_trace_fprobe(ev); > > + > > + return (event[0] == '\0' || > > + strcmp(trace_probe_name(&tf->tp), event) == 0) && > > + (!system || strcmp(trace_probe_group_name(&tf->tp), system) == 0) && > > + trace_fprobe_match_command_head(tf, argc, argv); > > The above is really hard to read, and Linus hates these kinds of > statements. Please break it up (the compiler should do the right thing). > > Reversing the tests to return false: > > if (event[0] != '\0' && > strcmp(trace_probe_name(&tf->tp), event) != 0)) > return false; > > if (system && strcmp(trace_probe_group_name(&tf->tp), system) != 0)) > return false; > > return trace_fprobe_match_command_head(tf, argc, argv); > OK, I'll clean it up. > > > +} > > + > > +static bool trace_fprobe_is_registered(struct trace_fprobe *tf) > > +{ > > + return fprobe_is_registered(&tf->fp); > > +} > > + > > +/* Note that we don't verify it, since the code does not come from user space */ > > Verify what? > > Hmm, I see this is a copy of the comment from both trace_kprobe.c and > trace_uprobe.c. I think this requires a bit more explanation (and also in > those locations as well). Yeah, sorry about this comment. I meant that this fetch process (will run on probe handlers) did not verify the fetch_insn code because it should be generated by parser. /* * Note that we don't verify the fetch_insn code, since it does not come * from user space. */ > > > +static int > > +process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, > > + void *base) > > +{ > > + struct pt_regs *regs = rec; > > + unsigned long val; > > + > > +retry: > > + /* 1st stage: get value from context */ > > + switch (code->op) { > > + case FETCH_OP_REG: > > + val = regs_get_register(regs, code->param); > > + break; > > + case FETCH_OP_STACK: > > + val = regs_get_kernel_stack_nth(regs, code->param); > > + break; > > + case FETCH_OP_STACKP: > > + val = kernel_stack_pointer(regs); > > + break; > > + case FETCH_OP_RETVAL: > > + val = regs_return_value(regs); > > + break; > > > > + case FETCH_OP_IMM: > > + val = code->immediate; > > + break; > > + case FETCH_OP_COMM: > > + val = (unsigned long)current->comm; > > + break; > > + case FETCH_OP_DATA: > > + val = (unsigned long)code->data; > > + break; > > These are new and not part of trace_kprobe.c. Should we have a version that > the two could share? Oh, I remember that I missed one series which did that. https://lore.kernel.org/lkml/20230103223202.7963bfb62b4ae5827e51ee30@xxxxxxxxxx/T/ Let's pick it. > > Probably a helper function that can be called by these two. > > > +#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API > > + case FETCH_OP_ARG: > > + val = regs_get_kernel_argument(regs, code->param); > > + break; > > +#endif > > + case FETCH_NOP_SYMBOL: /* Ignore a place holder */ > > + code++; > > + goto retry; > > + default: > > + return -EILSEQ; > > + } > > + code++; > > + > > + return process_fetch_insn_bottom(code, val, dest, base); > > +} > > +NOKPROBE_SYMBOL(process_fetch_insn) > > + > > +/* function entry handler */ > > +static nokprobe_inline void > > +__fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip, > > + struct pt_regs *regs, > > + struct trace_event_file *trace_file) > > +{ > > + struct fentry_trace_entry_head *entry; > > + struct trace_event_call *call = trace_probe_event_call(&tf->tp); > > + struct trace_event_buffer fbuffer; > > + int dsize; > > + > > + WARN_ON(call != trace_file->event_call); > > WARN_ON_ONCE()? > > And if you are doing the check, perhaps even: > > if (WARN_ON_ONCE(call != trace_file->event_call)) > return; OK. Thank you! > > -- Steve > > > + > > + if (trace_trigger_soft_disabled(trace_file)) > > + return; > > + > > + dsize = __get_data_size(&tf->tp, regs); > > + > > + entry = trace_event_buffer_reserve(&fbuffer, trace_file, > > + sizeof(*entry) + tf->tp.size + dsize); > > + if (!entry) > > + return; > > + > > + fbuffer.regs = regs; > > + entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); > > + entry->ip = entry_ip; > > + store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize); > > + > > + trace_event_buffer_commit(&fbuffer); > > +} > > + -- Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>