On Tue, Sep 10, 2024 at 11:10:23PM +0900, Masami Hiramatsu wrote: > On Mon, 9 Sep 2024 09:45:48 +0200 > Jiri Olsa <jolsa@xxxxxxxxxx> wrote: > > > Adding support for uprobe consumer to be defined as session and have > > new behaviour for consumer's 'handler' and 'ret_handler' callbacks. > > > > The session means that 'handler' and 'ret_handler' callbacks are > > connected in a way that allows to: > > > > - control execution of 'ret_handler' from 'handler' callback > > - share data between 'handler' and 'ret_handler' callbacks > > > > The session is enabled by setting new 'session' bool field to true > > in uprobe_consumer object. > > > > We use return_consumer object to keep track of consumers with > > 'ret_handler'. This object also carries the shared data between > > 'handler' and and 'ret_handler' callbacks. > > > > The control of 'ret_handler' callback execution is done via return > > value of the 'handler' callback. This patch adds new 'ret_handler' > > return value (2) which means to ignore ret_handler callback. > > > > Actions on 'handler' callback return values are now: > > > > 0 - execute ret_handler (if it's defined) > > 1 - remove uprobe > > 2 - do nothing (ignore ret_handler) > > > > The session concept fits to our common use case where we do filtering > > on entry uprobe and based on the result we decide to run the return > > uprobe (or not). > > OK, so this allows uprobes handler to record any input parameter and > access it from ret_handler as fprobe's private entry data, right? at the moment the shared data is 8 bytes.. I explored the way of having the generic (configured) sized data and it complicates the code a bit and we have no use case for that at the moment, so I decided to keep it simple for now I mentioned that in the cover email, I think it can be done as follow if it's needed in future: - I kept the session cookie instead of introducing generic session data, because it makes the change much more complicated, I think it can be added as a followup if it's needed > > The code looks good to me. > > Acked-by: Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx> > > trace_uprobe should also support $argN in retuprobe. could you please share more details on the use case? thanks, jirka > > https://lore.kernel.org/all/170952365552.229804.224112990211602895.stgit@devnote2/ > > Thank you, > > > > > It's also convenient to share the data between session callbacks. > > > > Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx> > > --- > > include/linux/uprobes.h | 17 ++- > > kernel/events/uprobes.c | 132 ++++++++++++++---- > > kernel/trace/bpf_trace.c | 6 +- > > kernel/trace/trace_uprobe.c | 12 +- > > .../selftests/bpf/bpf_testmod/bpf_testmod.c | 2 +- > > 5 files changed, 133 insertions(+), 36 deletions(-) > > > > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h > > index 2b294bf1881f..557901e04624 100644 > > --- a/include/linux/uprobes.h > > +++ b/include/linux/uprobes.h > > @@ -24,7 +24,7 @@ struct notifier_block; > > struct page; > > > > #define UPROBE_HANDLER_REMOVE 1 > > -#define UPROBE_HANDLER_MASK 1 > > +#define UPROBE_HANDLER_IGNORE 2 > > > > #define MAX_URETPROBE_DEPTH 64 > > > > @@ -37,13 +37,16 @@ struct uprobe_consumer { > > * for the current process. If filter() is omitted or returns true, > > * UPROBE_HANDLER_REMOVE is effectively ignored. > > */ > > - int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); > > + int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data); > > int (*ret_handler)(struct uprobe_consumer *self, > > unsigned long func, > > - struct pt_regs *regs); > > + struct pt_regs *regs, __u64 *data); > > bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm); > > > > struct list_head cons_node; > > + bool session; > > + > > + __u64 id; /* set when uprobe_consumer is registered */ > > }; > > > > #ifdef CONFIG_UPROBES > > @@ -83,14 +86,22 @@ struct uprobe_task { > > unsigned int depth; > > }; > > > > +struct return_consumer { > > + __u64 cookie; > > + __u64 id; > > +}; > > + > > struct return_instance { > > struct uprobe *uprobe; > > unsigned long func; > > unsigned long stack; /* stack pointer */ > > unsigned long orig_ret_vaddr; /* original return address */ > > bool chained; /* true, if instance is nested */ > > + int consumers_cnt; > > > > struct return_instance *next; /* keep as stack */ > > + > > + struct return_consumer consumers[] __counted_by(consumers_cnt); > > }; > > > > enum rp_check { > > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c > > index 4b7e590dc428..9e971f86afdf 100644 > > --- a/kernel/events/uprobes.c > > +++ b/kernel/events/uprobes.c > > @@ -67,6 +67,8 @@ struct uprobe { > > loff_t ref_ctr_offset; > > unsigned long flags; > > > > + unsigned int consumers_cnt; > > + > > /* > > * The generic code assumes that it has two members of unknown type > > * owned by the arch-specific code: > > @@ -826,8 +828,12 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset, > > > > static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) > > { > > + static atomic64_t id; > > + > > down_write(&uprobe->consumer_rwsem); > > list_add_rcu(&uc->cons_node, &uprobe->consumers); > > + uc->id = (__u64) atomic64_inc_return(&id); > > + uprobe->consumers_cnt++; > > up_write(&uprobe->consumer_rwsem); > > } > > > > @@ -839,6 +845,7 @@ static void consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) > > { > > down_write(&uprobe->consumer_rwsem); > > list_del_rcu(&uc->cons_node); > > + uprobe->consumers_cnt--; > > up_write(&uprobe->consumer_rwsem); > > } > > > > @@ -1786,6 +1793,30 @@ static struct uprobe_task *get_utask(void) > > return current->utask; > > } > > > > +static size_t ri_size(int consumers_cnt) > > +{ > > + struct return_instance *ri; > > + > > + return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt; > > +} > > + > > +static struct return_instance *alloc_return_instance(int consumers_cnt) > > +{ > > + struct return_instance *ri; > > + > > + ri = kzalloc(ri_size(consumers_cnt), GFP_KERNEL); > > + if (ri) > > + ri->consumers_cnt = consumers_cnt; > > + return ri; > > +} > > + > > +static struct return_instance *dup_return_instance(struct return_instance *old) > > +{ > > + size_t size = ri_size(old->consumers_cnt); > > + > > + return kmemdup(old, size, GFP_KERNEL); > > +} > > + > > static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) > > { > > struct uprobe_task *n_utask; > > @@ -1798,11 +1829,10 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) > > > > p = &n_utask->return_instances; > > for (o = o_utask->return_instances; o; o = o->next) { > > - n = kmalloc(sizeof(struct return_instance), GFP_KERNEL); > > + n = dup_return_instance(o); > > if (!n) > > return -ENOMEM; > > > > - *n = *o; > > /* > > * uprobe's refcnt has to be positive at this point, kept by > > * utask->return_instances items; return_instances can't be > > @@ -1895,39 +1925,35 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained, > > utask->return_instances = ri; > > } > > > > -static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) > > +static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs, > > + struct return_instance *ri) > > { > > - struct return_instance *ri; > > struct uprobe_task *utask; > > unsigned long orig_ret_vaddr, trampoline_vaddr; > > bool chained; > > > > if (!get_xol_area()) > > - return; > > + goto free; > > > > utask = get_utask(); > > if (!utask) > > - return; > > + goto free; > > > > if (utask->depth >= MAX_URETPROBE_DEPTH) { > > printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to" > > " nestedness limit pid/tgid=%d/%d\n", > > current->pid, current->tgid); > > - return; > > + goto free; > > } > > > > /* we need to bump refcount to store uprobe in utask */ > > if (!try_get_uprobe(uprobe)) > > - return; > > - > > - ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL); > > - if (!ri) > > - goto fail; > > + goto free; > > > > trampoline_vaddr = uprobe_get_trampoline_vaddr(); > > orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); > > if (orig_ret_vaddr == -1) > > - goto fail; > > + goto put; > > > > /* drop the entries invalidated by longjmp() */ > > chained = (orig_ret_vaddr == trampoline_vaddr); > > @@ -1945,7 +1971,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) > > * attack from user-space. > > */ > > uprobe_warn(current, "handle tail call"); > > - goto fail; > > + goto put; > > } > > orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; > > } > > @@ -1960,9 +1986,10 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) > > utask->return_instances = ri; > > > > return; > > -fail: > > - kfree(ri); > > +put: > > put_uprobe(uprobe); > > +free: > > + kfree(ri); > > } > > > > /* Prepare to single-step probed instruction out of line. */ > > @@ -2114,35 +2141,78 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb > > return uprobe; > > } > > > > +static struct return_consumer * > > +return_consumer_next(struct return_instance *ri, struct return_consumer *ric) > > +{ > > + return ric ? ric + 1 : &ri->consumers[0]; > > +} > > + > > +static struct return_consumer * > > +return_consumer_find(struct return_instance *ri, int *iter, int id) > > +{ > > + struct return_consumer *ric; > > + int idx = *iter; > > + > > + for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) { > > + if (ric->id == id) { > > + *iter = idx + 1; > > + return ric; > > + } > > + } > > + return NULL; > > +} > > + > > static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) > > { > > struct uprobe_consumer *uc; > > int remove = UPROBE_HANDLER_REMOVE; > > - bool need_prep = false; /* prepare return uprobe, when needed */ > > + struct return_consumer *ric = NULL; > > + struct return_instance *ri = NULL; > > bool has_consumers = false; > > > > current->utask->auprobe = &uprobe->arch; > > > > list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node, > > srcu_read_lock_held(&uprobes_srcu)) { > > + __u64 cookie = 0; > > int rc = 0; > > > > if (uc->handler) { > > - rc = uc->handler(uc, regs); > > - WARN(rc & ~UPROBE_HANDLER_MASK, > > + rc = uc->handler(uc, regs, &cookie); > > + WARN(rc < 0 || rc > 2, > > "bad rc=0x%x from %ps()\n", rc, uc->handler); > > } > > > > - if (uc->ret_handler) > > - need_prep = true; > > - > > + /* > > + * The handler can return following values: > > + * 0 - execute ret_handler (if it's defined) > > + * 1 - remove uprobe > > + * 2 - do nothing (ignore ret_handler) > > + */ > > remove &= rc; > > has_consumers = true; > > + > > + if (rc == 0 && uc->ret_handler) { > > + /* > > + * Preallocate return_instance object optimistically with > > + * all possible consumers, so we allocate just once. > > + */ > > + if (!ri) { > > + ri = alloc_return_instance(uprobe->consumers_cnt); > > + if (!ri) > > + return; > > + } > > + ric = return_consumer_next(ri, ric); > > + ric->cookie = cookie; > > + ric->id = uc->id; > > + } > > } > > current->utask->auprobe = NULL; > > > > - if (need_prep && !remove) > > - prepare_uretprobe(uprobe, regs); /* put bp at return */ > > + if (ri && !remove) > > + prepare_uretprobe(uprobe, regs, ri); /* put bp at return */ > > + else > > + kfree(ri); > > > > if (remove && has_consumers) { > > down_read(&uprobe->register_rwsem); > > @@ -2160,15 +2230,25 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) > > static void > > handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs) > > { > > + struct return_consumer *ric = NULL; > > struct uprobe *uprobe = ri->uprobe; > > struct uprobe_consumer *uc; > > - int srcu_idx; > > + int srcu_idx, iter = 0; > > > > srcu_idx = srcu_read_lock(&uprobes_srcu); > > list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node, > > srcu_read_lock_held(&uprobes_srcu)) { > > + /* > > + * If we don't find return consumer, it means uprobe consumer > > + * was added after we hit uprobe and return consumer did not > > + * get registered in which case we call the ret_handler only > > + * if it's not session consumer. > > + */ > > + ric = return_consumer_find(ri, &iter, uc->id); > > + if (!ric && uc->session) > > + continue; > > if (uc->ret_handler) > > - uc->ret_handler(uc, ri->func, regs); > > + uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie : NULL); > > } > > srcu_read_unlock(&uprobes_srcu, srcu_idx); > > } > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > > index ac0a01cc8634..de241503c8fb 100644 > > --- a/kernel/trace/bpf_trace.c > > +++ b/kernel/trace/bpf_trace.c > > @@ -3332,7 +3332,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm) > > } > > > > static int > > -uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) > > +uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs, > > + __u64 *data) > > { > > struct bpf_uprobe *uprobe; > > > > @@ -3341,7 +3342,8 @@ uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) > > } > > > > static int > > -uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) > > +uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs, > > + __u64 *data) > > { > > struct bpf_uprobe *uprobe; > > > > diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c > > index f7443e996b1b..11103dde897b 100644 > > --- a/kernel/trace/trace_uprobe.c > > +++ b/kernel/trace/trace_uprobe.c > > @@ -88,9 +88,11 @@ static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev) > > static int register_uprobe_event(struct trace_uprobe *tu); > > static int unregister_uprobe_event(struct trace_uprobe *tu); > > > > -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); > > +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs, > > + __u64 *data); > > static int uretprobe_dispatcher(struct uprobe_consumer *con, > > - unsigned long func, struct pt_regs *regs); > > + unsigned long func, struct pt_regs *regs, > > + __u64 *data); > > > > #ifdef CONFIG_STACK_GROWSUP > > static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) > > @@ -1500,7 +1502,8 @@ trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, > > } > > } > > > > -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) > > +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs, > > + __u64 *data) > > { > > struct trace_uprobe *tu; > > struct uprobe_dispatch_data udd; > > @@ -1530,7 +1533,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) > > } > > > > static int uretprobe_dispatcher(struct uprobe_consumer *con, > > - unsigned long func, struct pt_regs *regs) > > + unsigned long func, struct pt_regs *regs, > > + __u64 *data) > > { > > struct trace_uprobe *tu; > > struct uprobe_dispatch_data udd; > > diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c > > index 1fc16657cf42..e91ff5b285f0 100644 > > --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c > > +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c > > @@ -421,7 +421,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { > > > > static int > > uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, > > - struct pt_regs *regs) > > + struct pt_regs *regs, __u64 *data) > > > > { > > regs->ax = 0x12345678deadbeef; > > -- > > 2.46.0 > > > > > -- > Masami Hiramatsu (Google) <mhiramat@xxxxxxxxxx>