On Thu, 4 Nov 2021 10:04:32 -0700 Beau Belgrave <beaub@xxxxxxxxxxxxxxxxxxx> wrote: > Pass iterator through to probes to allow copying data directly to the > probe buffers instead of taking multiple copies. Enables eBPF user and > raw iterator types out to programs for no-copy scenarios. > > Signed-off-by: Beau Belgrave <beaub@xxxxxxxxxxxxxxxxxxx> > --- > kernel/trace/trace_events_user.c | 97 +++++++++++++++++++++++--------- > 1 file changed, 69 insertions(+), 28 deletions(-) > > diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c > index b5fe0550b489..d50118b9630a 100644 > --- a/kernel/trace/trace_events_user.c > +++ b/kernel/trace/trace_events_user.c > @@ -39,6 +39,10 @@ > #define MAX_EVENT_DESC 512 > #define EVENT_NAME(user_event) ((user_event)->tracepoint.name) > > +#define MAX_BPF_COPY_SIZE PAGE_SIZE > +#define MAX_STACK_BPF_DATA 512 > +#define copy_nofault copy_from_iter_nocache > + > static char *register_page_data; > > static DEFINE_MUTEX(reg_mutex); > @@ -63,8 +67,7 @@ struct user_event_refs { > struct user_event *events[]; > }; > > -typedef void (*user_event_func_t) (struct user_event *user, > - void *data, u32 datalen, > +typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, > void *tpdata); > > static int user_event_parse(char *name, char *args, char *flags, > @@ -491,7 +494,7 @@ static struct user_event *find_user_event(char *name, u32 *outkey) > /* > * Writes the user supplied payload out to a trace file. > */ > -static void user_event_ftrace(struct user_event *user, void *data, u32 datalen, > +static void user_event_ftrace(struct user_event *user, struct iov_iter *i, > void *tpdata) > { > struct trace_event_file *file; > @@ -506,41 +509,82 @@ static void user_event_ftrace(struct user_event *user, void *data, u32 datalen, > return; > > entry = trace_event_buffer_reserve(&event_buffer, file, > - sizeof(*entry) + datalen); > + sizeof(*entry) + i->count); > > if (unlikely(!entry)) > return; > > - memcpy(entry + 1, data, datalen); > + if (unlikely(!copy_nofault(entry + 1, i->count, i))) Need: __trace_event_discard_commit(event_buffer.buffer, event_buffer.event); Because the trace_event_buffer_reserve() will not only allocate space on the ring buffer, but may also disable preemption. -- Steve > + return; > > trace_event_buffer_commit(&event_buffer); > } > > #ifdef CONFIG_PERF_EVENTS