Hi Steven, This patch passed my test. But I have some concern, please see comment inline. On Fri, Jan 07, 2022 at 05:56:57PM -0500, Steven Rostedt wrote: > From: Steven Rostedt <rostedt@xxxxxxxxxxx> > > Pingfan reported that the following causes a fault: > > echo "filename ~ \"cpu\"" > events/syscalls/sys_enter_openat/filter > echo 1 > events/syscalls/sys_enter_at/enable > > The reason is that trace event filter treats the user space pointer > defined by "filename" as a normal pointer to compare against the "cpu" > string. If the string is not loaded into memory yet, it will trigger a > fault in kernel space: > > kvm-03-guest16 login: [72198.026181] BUG: unable to handle page fault for address: 00007fffaae8ef60 > #PF: supervisor read access in kernel mode > #PF: error_code(0x0001) - permissions violation > PGD 80000001008b7067 P4D 80000001008b7067 PUD 2393f1067 PMD 2393ec067 PTE 8000000108f47867 > Oops: 0001 [#1] PREEMPT SMP PTI > CPU: 1 PID: 1 Comm: systemd Kdump: loaded Not tainted 5.14.0-32.el9.x86_64 #1 > Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 > RIP: 0010:strlen+0x0/0x20 > Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 > 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8 > 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 > RSP: 0018:ffffb5b900013e48 EFLAGS: 00010246 > RAX: 0000000000000018 RBX: ffff8fc1c49ede00 RCX: 0000000000000000 > RDX: 0000000000000020 RSI: ffff8fc1c02d601c RDI: 00007fffaae8ef60 > RBP: 00007fffaae8ef60 R08: 0005034f4ddb8ea4 R09: 0000000000000000 > R10: ffff8fc1c02d601c R11: 0000000000000000 R12: ffff8fc1c8a6e380 > R13: 0000000000000000 R14: ffff8fc1c02d6010 R15: ffff8fc1c00453c0 > FS: 00007fa86123db40(0000) GS:ffff8fc2ffd00000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00007fffaae8ef60 CR3: 0000000102880001 CR4: 00000000007706e0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > PKRU: 55555554 > Call Trace: > filter_pred_pchar+0x18/0x40 > filter_match_preds+0x31/0x70 > ftrace_syscall_enter+0x27a/0x2c0 > syscall_trace_enter.constprop.0+0x1aa/0x1d0 > do_syscall_64+0x16/0x90 > entry_SYSCALL_64_after_hwframe+0x44/0xae > RIP: 0033:0x7fa861d88664 > > To be even more robust, test both kernel and user space strings. If the > string fails to read, then simply have the filter fail. > > Link: https://lore.kernel.org/all/20220107044951.22080-1-kernelfans@xxxxxxxxx/ > > Cc: stable@xxxxxxxxxxxxxxx > Reported-by: Pingfan Liu <kernelfans@xxxxxxxxx> > Fixes: 87a342f5db69d ("tracing/filters: Support filtering for char * strings") > Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx> > --- > kernel/trace/trace_events_filter.c | 79 +++++++++++++++++++++++++++++- > 1 file changed, 77 insertions(+), 2 deletions(-) > > diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c > index 996920ed1812..cf0fa9a785c7 100644 > --- a/kernel/trace/trace_events_filter.c > +++ b/kernel/trace/trace_events_filter.c > @@ -5,6 +5,7 @@ > * Copyright (C) 2009 Tom Zanussi <tzanussi@xxxxxxxxx> > */ > > +#include <linux/uaccess.h> > #include <linux/module.h> > #include <linux/ctype.h> > #include <linux/mutex.h> > @@ -654,12 +655,50 @@ DEFINE_EQUALITY_PRED(32); > DEFINE_EQUALITY_PRED(16); > DEFINE_EQUALITY_PRED(8); > > +/* user space strings temp buffer */ > +#define USTRING_BUF_SIZE 512 Should it be PATH_MAX(4096) in case of matching against a file path? > + > +struct ustring_buffer { > + char buffer[USTRING_BUF_SIZE]; > +}; > + > +static __percpu struct ustring_buffer *ustring_per_cpu; > + > +static __always_inline char *test_string(char *str) > +{ > + struct ustring_buffer *ubuf; > + char __user *ustr; > + char *kstr; > + > + if (!ustring_per_cpu) > + return NULL; > + > + ubuf = this_cpu_ptr(ustring_per_cpu); > + kstr = ubuf->buffer; > + > + if (likely((unsigned long)str >= TASK_SIZE)) { > + /* For safety, do not trust the string pointer */ > + if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) Since no other trace_event_class except event_class_syscall_enter tries to uaccess, so the unreliable source only comes from event_class_syscall_enter. In that case, the access to kernel address is forbidden. So here just return -EACCES ? > + return NULL; > + } else { > + /* user space address? */ > + ustr = str; > + if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) > + return NULL; > + } > + return kstr; > +} > + > /* Filter predicate for fixed sized arrays of characters */ > static int filter_pred_string(struct filter_pred *pred, void *event) > { > char *addr = (char *)(event + pred->offset); > int cmp, match; > > + addr = test_string(addr); Among all of trace_event_class, only event_class_syscall_enter exposed to this fault (uprobe does not uaccess). So I think the strncpy_*() can be avoided based on class, which improves performance. > + if (!addr) > + return 0; > + > cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len); > > match = cmp ^ pred->not; > @@ -671,10 +710,16 @@ static int filter_pred_string(struct filter_pred *pred, void *event) > static int filter_pred_pchar(struct filter_pred *pred, void *event) > { > char **addr = (char **)(event + pred->offset); > + char *str; > int cmp, match; > - int len = strlen(*addr) + 1; /* including tailing '\0' */ > + int len; > + > + str = test_string(*addr); > + if (!str) > + return 0; > > - cmp = pred->regex.match(*addr, &pred->regex, len); > + len = strlen(str) + 1; /* including tailing '\0' */ > + cmp = pred->regex.match(str, &pred->regex, len); > > match = cmp ^ pred->not; > > @@ -784,6 +829,10 @@ static int filter_pred_none(struct filter_pred *pred, void *event) > > static int regex_match_full(char *str, struct regex *r, int len) > { > + str = test_string(str); Since all regex_match_*() are called in filter_pred_*(), which have already protected codes from page fault. So no need to double check. Thanks, Pingfan > + if (!str) > + return 0; > + > /* len of zero means str is dynamic and ends with '\0' */ > if (!len) > return strcmp(str, r->pattern) == 0; > @@ -793,6 +842,10 @@ static int regex_match_full(char *str, struct regex *r, int len) > > static int regex_match_front(char *str, struct regex *r, int len) > { > + str = test_string(str); > + if (!str) > + return 0; > + > if (len && len < r->len) > return 0; > > @@ -801,6 +854,10 @@ static int regex_match_front(char *str, struct regex *r, int len) > > static int regex_match_middle(char *str, struct regex *r, int len) > { > + str = test_string(str); > + if (!str) > + return 0; > + > if (!len) > return strstr(str, r->pattern) != NULL; > > @@ -811,6 +868,10 @@ static int regex_match_end(char *str, struct regex *r, int len) > { > int strlen = len - 1; > > + str = test_string(str); > + if (!str) > + return 0; > + > if (strlen >= r->len && > memcmp(str + strlen - r->len, r->pattern, r->len) == 0) > return 1; > @@ -819,6 +880,10 @@ static int regex_match_end(char *str, struct regex *r, int len) > > static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused) > { > + str = test_string(str); > + if (!str) > + return 0; > + > if (glob_match(r->pattern, str)) > return 1; > return 0; > @@ -1335,6 +1400,13 @@ static int parse_pred(const char *str, void *data, > strncpy(pred->regex.pattern, str + s, len); > pred->regex.pattern[len] = 0; > > + if (!ustring_per_cpu) { > + /* Once allocated, keep it around for good */ > + ustring_per_cpu = alloc_percpu(struct ustring_buffer); > + if (!ustring_per_cpu) > + goto err_mem; > + } > + > filter_build_regex(pred); > > if (field->filter_type == FILTER_COMM) { > @@ -1415,6 +1487,9 @@ static int parse_pred(const char *str, void *data, > err_free: > kfree(pred); > return -EINVAL; > +err_mem: > + kfree(pred); > + return -ENOMEM; > } > > enum { > -- > 2.33.0