On Mon, Dec 05, 2022 at 04:33:34PM -0500, Mathieu Desnoyers wrote: > On 2022-12-05 16:00, Beau Belgrave wrote: > > Operators want to be able to ensure enough tracepoints exist on the > > system for kernel components as well as for user components. Since there > > are only up to 64K events, by default allow up to half to be used by > > user events. > > > > Add a boot parameter (user_events_max=%d) and a kernel sysctl parameter > > (kernel.user_events_max) to set a global limit that is honored among all > > groups on the system. This ensures hard limits can be setup to prevent > > user processes from consuming all event IDs on the system. > > > > Signed-off-by: Beau Belgrave <beaub@xxxxxxxxxxxxxxxxxxx> > > --- > > kernel/trace/trace_events_user.c | 57 ++++++++++++++++++++++++++++++++ > > 1 file changed, 57 insertions(+) > > > > diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c > > index 36def244a755..754942ba92a1 100644 > > --- a/kernel/trace/trace_events_user.c > > +++ b/kernel/trace/trace_events_user.c > > @@ -20,6 +20,7 @@ > > #include <linux/types.h> > > #include <linux/uaccess.h> > > #include <linux/highmem.h> > > +#include <linux/init.h> > > #include <linux/user_events.h> > > #include "trace.h" > > #include "trace_dynevent.h" > > @@ -61,6 +62,12 @@ struct user_event_group { > > /* Group for init_user_ns mapping, top-most group */ > > static struct user_event_group *init_group; > > +/* Max allowed events for the whole system */ > > +static unsigned int max_user_events = 32768; > > + > > +/* Current number of events on the whole system */ > > +static unsigned int current_user_events; > > + > > /* > > * Stores per-event properties, as users register events > > * within a file a user_event might be created if it does not > > @@ -1247,6 +1254,11 @@ static int destroy_user_event(struct user_event *user) > > kfree(EVENT_NAME(user)); > > kfree(user); > > + if (current_user_events > 0) > > + current_user_events--; > > What holds the user_events mutex that guarantees that non-atomic decrement > is safe here ? > All callers of destroy_user_event hold the event_mutex, since it removes the call from the system. This is the same for when the current_user_events get incremented. Maybe add a lock_dep statement here to make it clear? Thanks, -Beau > Thanks, > > Mathieu > > > + else > > + pr_alert("BUG: Bad current_user_events\n"); > > + > > return ret; > > } > > @@ -1732,6 +1744,11 @@ static int user_event_parse(struct user_event_group *group, char *name, > > mutex_lock(&event_mutex); > > + if (current_user_events >= max_user_events) { > > + ret = -EMFILE; > > + goto put_user_lock; > > + } > > + > > ret = user_event_trace_register(user); > > if (ret) > > @@ -1743,6 +1760,7 @@ static int user_event_parse(struct user_event_group *group, char *name, > > dyn_event_init(&user->devent, &user_event_dops); > > dyn_event_add(&user->devent, &user->call); > > hash_add(group->register_table, &user->node, key); > > + current_user_events++; > > mutex_unlock(&event_mutex); > > @@ -2369,6 +2387,43 @@ static int create_user_tracefs(void) > > return -ENODEV; > > } > > +static int __init set_max_user_events(char *str) > > +{ > > + if (!str) > > + return 0; > > + > > + if (kstrtouint(str, 0, &max_user_events)) > > + return 0; > > + > > + return 1; > > +} > > +__setup("user_events_max=", set_max_user_events); > > + > > +static int set_max_user_events_sysctl(struct ctl_table *table, int write, > > + void *buffer, size_t *lenp, loff_t *ppos) > > +{ > > + int ret; > > + > > + mutex_lock(&event_mutex); > > + > > + ret = proc_douintvec(table, write, buffer, lenp, ppos); > > + > > + mutex_unlock(&event_mutex); > > + > > + return ret; > > +} > > + > > +static struct ctl_table user_event_sysctls[] = { > > + { > > + .procname = "user_events_max", > > + .data = &max_user_events, > > + .maxlen = sizeof(unsigned int), > > + .mode = 0644, > > + .proc_handler = set_max_user_events_sysctl, > > + }, > > + {} > > +}; > > + > > static int __init trace_events_user_init(void) > > { > > int ret; > > @@ -2398,6 +2453,8 @@ static int __init trace_events_user_init(void) > > if (dyn_event_register(&user_event_dops)) > > pr_warn("user_events could not register with dyn_events\n"); > > + register_sysctl_init("kernel", user_event_sysctls); > > + > > return 0; > > } > > -- > Mathieu Desnoyers > EfficiOS Inc. > https://www.efficios.com