On Tue, 21 Feb 2023 13:11:35 -0800 Beau Belgrave <beaub@xxxxxxxxxxxxxxxxxxx> wrote: > include/linux/user_events.h | 53 ++- > include/uapi/linux/user_events.h | 15 +- > kernel/trace/Kconfig | 5 +- > kernel/trace/trace_events_user.c | 586 ++++++++++++++++++++++++------- > 4 files changed, 517 insertions(+), 142 deletions(-) > > diff --git a/include/linux/user_events.h b/include/linux/user_events.h > index 3d747c45d2fa..0120b3dd5b03 100644 > --- a/include/linux/user_events.h > +++ b/include/linux/user_events.h > @@ -9,13 +9,63 @@ > #ifndef _LINUX_USER_EVENTS_H > #define _LINUX_USER_EVENTS_H > > +#include <linux/list.h> > +#include <linux/refcount.h> > +#include <linux/mm_types.h> > +#include <linux/workqueue.h> > #include <uapi/linux/user_events.h> > > #ifdef CONFIG_USER_EVENTS > struct user_event_mm { > + struct list_head link; > + struct list_head enablers; > + struct mm_struct *mm; > + struct user_event_mm *next; > + refcount_t refcnt; > + refcount_t tasks; > + struct rcu_work put_rwork; > }; This is more of a nit, and not something to change unless there's more real content to change, but please when making structures tab out the field names: struct user_event_mm { struct list_head link; struct list_head enablers; struct mm_struct *mm; struct user_event_mm *next; refcount_t refcnt; refcount_t tasks; struct rcu_work put_rwork; }; See how much easier it is to read (and know what fields exist). > -#endif > > #endif /* _LINUX_USER_EVENTS_H */ > diff --git a/include/uapi/linux/user_events.h b/include/uapi/linux/user_events.h > index 03f92366068d..22521bc622db 100644 > --- a/include/uapi/linux/user_events.h > +++ b/include/uapi/linux/user_events.h > @@ -27,12 +27,21 @@ struct user_reg { > /* Input: Size of the user_reg structure being used */ > __u32 size; > > + /* Input: Bit in enable address to use */ > + __u8 enable_bit; > + > + /* Input: Enable size in bytes at address */ > + __u8 enable_size; > + > + /* Input: Flags for future use, set to 0 */ > + __u16 flags; > + > + /* Input: Address to update when enabled */ > + __u64 enable_addr; > + > /* Input: Pointer to string with event name, description and flags */ > __u64 name_args; > > - /* Output: Bitwise index of the event within the status page */ > - __u32 status_bit; > - > /* Output: Index of the event to use when writing data */ > __u32 write_index; > } __attribute__((__packed__)); May want to tab the above too, but as each field is commented, it's not as big of an issue. > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig > index d7043043f59c..b61a1bfbfc22 100644 > --- a/kernel/trace/Kconfig > +++ b/kernel/trace/Kconfig > @@ -791,9 +791,10 @@ config USER_EVENTS > can be used like an existing kernel trace event. User trace > events are generated by writing to a tracefs file. User > processes can determine if their tracing events should be > - generated by memory mapping a tracefs file and checking for > - an associated byte being non-zero. > + generated by registering a value and bit with the kernel > + that reflects when it is enabled or not. > > + See Documentation/trace/user_events.rst. > If in doubt, say N. > > config HIST_TRIGGERS > diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c > index 070551480747..553a82ee7aeb 100644 > --- a/kernel/trace/trace_events_user.c > +++ b/kernel/trace/trace_events_user.c > @@ -19,6 +19,7 @@ > #include <linux/tracefs.h> > #include <linux/types.h> > #include <linux/uaccess.h> > +#include <linux/highmem.h> > #include <linux/user_events.h> > #include "trace.h" > #include "trace_dynevent.h" > @@ -29,34 +30,11 @@ > #define FIELD_DEPTH_NAME 1 > #define FIELD_DEPTH_SIZE 2 > > -/* > - * Limits how many trace_event calls user processes can create: > - * Must be a power of two of PAGE_SIZE. > - */ > -#define MAX_PAGE_ORDER 0 > -#define MAX_PAGES (1 << MAX_PAGE_ORDER) > -#define MAX_BYTES (MAX_PAGES * PAGE_SIZE) > -#define MAX_EVENTS (MAX_BYTES * 8) > - > /* Limit how long of an event name plus args within the subsystem. */ > #define MAX_EVENT_DESC 512 > #define EVENT_NAME(user_event) ((user_event)->tracepoint.name) > #define MAX_FIELD_ARRAY_SIZE 1024 > > -/* > - * The MAP_STATUS_* macros are used for taking a index and determining the > - * appropriate byte and the bit in the byte to set/reset for an event. > - * > - * The lower 3 bits of the index decide which bit to set. > - * The remaining upper bits of the index decide which byte to use for the bit. > - * > - * This is used when an event has a probe attached/removed to reflect live > - * status of the event wanting tracing or not to user-programs via shared > - * memory maps. > - */ > -#define MAP_STATUS_BYTE(index) ((index) >> 3) > -#define MAP_STATUS_MASK(index) BIT((index) & 7) > - > /* > * Internal bits (kernel side only) to keep track of connected probes: > * These are used when status is requested in text form about an event. These > @@ -70,20 +48,14 @@ > #define EVENT_STATUS_OTHER BIT(7) > > /* > - * Stores the pages, tables, and locks for a group of events. > - * Each logical grouping of events has its own group, with a > - * matching page for status checks within user programs. This > - * allows for isolation of events to user programs by various > - * means. > + * Stores the system name, tables, and locks for a group of events. This > + * allows isolation for events by various means. > */ > struct user_event_group { > - struct page *pages; > - char *register_page_data; > char *system_name; > struct hlist_node node; > struct mutex reg_mutex; > DECLARE_HASHTABLE(register_table, 8); > - DECLARE_BITMAP(page_bitmap, MAX_EVENTS); > }; > > /* Group for init_user_ns mapping, top-most group */ > @@ -106,12 +78,34 @@ struct user_event { > struct list_head fields; > struct list_head validators; > refcount_t refcnt; > - int index; > - int flags; > int min_size; > char status; > }; But these should be tabbed out. > > +/* > + * Stores per-mm/event properties that enable an address to be > + * updated properly for each task. As tasks are forked, we use > + * these to track enablement sites that are tied to an event. > + */ > +struct user_event_enabler { > + struct list_head link; > + struct user_event *event; > + unsigned long addr; > + > + /* Track enable bit, flags, etc. Aligned for bitops. */ > + unsigned int values; > +}; > + And the above. > +/* Bits 0-5 are for the bit to update upon enable/disable (0-63 allowed) */ > +#define ENABLE_VAL_BIT_MASK 0x3F > + This is something that can be added later as a clean up, but if there's a real issue found with these patches, then make the next version have the updates. If you do another version, update the tabs of existing structures in a separate patch from any content change. -- Steve