On Thu, Dec 05, 2024 at 04:24:17PM -0800, Andrii Nakryiko wrote: SNIP > +static void free_ret_instance(struct uprobe_task *utask, > + struct return_instance *ri, bool cleanup_hprobe) > +{ > + unsigned seq; > + > if (cleanup_hprobe) { > enum hprobe_state hstate; > > @@ -1897,8 +1923,22 @@ static void free_ret_instance(struct return_instance *ri, bool cleanup_hprobe) > hprobe_finalize(&ri->hprobe, hstate); > } > > - kfree(ri->extra_consumers); > - kfree_rcu(ri, rcu); > + /* > + * At this point return_instance is unlinked from utask's > + * return_instances list and this has become visible to ri_timer(). > + * If seqcount now indicates that ri_timer's return instance > + * processing loop isn't active, we can return ri into the pool of > + * to-be-reused return instances for future uretprobes. If ri_timer() > + * happens to be running right now, though, we fallback to safety and > + * just perform RCU-delated freeing of ri. > + */ > + if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) { > + /* immediate reuse of ri without RCU GP is OK */ > + ri_pool_push(utask, ri); should the push be limitted somehow? I wonder you could make uprobes/consumers setup that would allocate/push many of ri instances that would not be freed until the process exits? jirka > + } else { > + /* we might be racing with ri_timer(), so play it safe */ > + ri_free(ri); > + } > } > > /* > @@ -1920,7 +1960,15 @@ void uprobe_free_utask(struct task_struct *t) > ri = utask->return_instances; > while (ri) { > ri_next = ri->next; > - free_ret_instance(ri, true /* cleanup_hprobe */); > + free_ret_instance(utask, ri, true /* cleanup_hprobe */); > + ri = ri_next; > + } > + > + /* free_ret_instance() above might add to ri_pool, so this loop should come last */ > + ri = utask->ri_pool; > + while (ri) { > + ri_next = ri->next; > + ri_free(ri); > ri = ri_next; > } > > @@ -1943,8 +1991,12 @@ static void ri_timer(struct timer_list *timer) > /* RCU protects return_instance from freeing. */ > guard(rcu)(); > > + write_seqcount_begin(&utask->ri_seqcount); > + > for_each_ret_instance_rcu(ri, utask->return_instances) > hprobe_expire(&ri->hprobe, false); > + > + write_seqcount_end(&utask->ri_seqcount); > } > > static struct uprobe_task *alloc_utask(void) > @@ -1956,6 +2008,7 @@ static struct uprobe_task *alloc_utask(void) > return NULL; > > timer_setup(&utask->ri_timer, ri_timer, 0); > + seqcount_init(&utask->ri_seqcount); > > return utask; > } > @@ -1975,10 +2028,14 @@ static struct uprobe_task *get_utask(void) > return current->utask; > } > > -static struct return_instance *alloc_return_instance(void) > +static struct return_instance *alloc_return_instance(struct uprobe_task *utask) > { > struct return_instance *ri; > > + ri = ri_pool_pop(utask); > + if (ri) > + return ri; > + > ri = kzalloc(sizeof(*ri), GFP_KERNEL); > if (!ri) > return ZERO_SIZE_PTR; > @@ -2119,7 +2176,7 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained, > rcu_assign_pointer(utask->return_instances, ri_next); > utask->depth--; > > - free_ret_instance(ri, true /* cleanup_hprobe */); > + free_ret_instance(utask, ri, true /* cleanup_hprobe */); > ri = ri_next; > } > } > @@ -2186,7 +2243,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs, > > return; > free: > - kfree(ri); > + ri_free(ri); > } > > /* Prepare to single-step probed instruction out of line. */ > @@ -2385,8 +2442,7 @@ static struct return_instance *push_consumer(struct return_instance *ri, __u64 i > if (unlikely(ri->cons_cnt > 0)) { > ric = krealloc(ri->extra_consumers, sizeof(*ric) * ri->cons_cnt, GFP_KERNEL); > if (!ric) { > - kfree(ri->extra_consumers); > - kfree_rcu(ri, rcu); > + ri_free(ri); > return ZERO_SIZE_PTR; > } > ri->extra_consumers = ric; > @@ -2428,8 +2484,9 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) > struct uprobe_consumer *uc; > bool has_consumers = false, remove = true; > struct return_instance *ri = NULL; > + struct uprobe_task *utask = current->utask; > > - current->utask->auprobe = &uprobe->arch; > + utask->auprobe = &uprobe->arch; > > list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { > bool session = uc->handler && uc->ret_handler; > @@ -2449,12 +2506,12 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) > continue; > > if (!ri) > - ri = alloc_return_instance(); > + ri = alloc_return_instance(utask); > > if (session) > ri = push_consumer(ri, uc->id, cookie); > } > - current->utask->auprobe = NULL; > + utask->auprobe = NULL; > > if (!ZERO_OR_NULL_PTR(ri)) > prepare_uretprobe(uprobe, regs, ri); > @@ -2554,7 +2611,7 @@ void uprobe_handle_trampoline(struct pt_regs *regs) > hprobe_finalize(&ri->hprobe, hstate); > > /* We already took care of hprobe, no need to waste more time on that. */ > - free_ret_instance(ri, false /* !cleanup_hprobe */); > + free_ret_instance(utask, ri, false /* !cleanup_hprobe */); > ri = ri_next; > } while (ri != next_chain); > } while (!valid); > -- > 2.43.5 >