+ bpf On Thu, Jul 11, 2024 at 4:07 AM Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote: > > Both single-step and uretprobes take a refcount on struct uprobe in > handle_swbp() in order to ensure struct uprobe stays extant until a > next trap. > > Since uprobe_unregister() only cares about the uprobe_consumer > life-time, and these intra-trap sections can be arbitrarily large, > create a second SRCU domain to cover these. > > Notably, a uretprobe with a registered return_instance that never > triggers -- because userspace -- will currently pin the > return_instance and related uprobe until the task dies. With this > convertion to SRCU this behaviour will inhibit freeing of all uprobes. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> > --- > include/linux/uprobes.h | 2 + > kernel/events/uprobes.c | 60 +++++++++++++++++++++++------------------------- > 2 files changed, 31 insertions(+), 31 deletions(-) > > --- a/include/linux/uprobes.h > +++ b/include/linux/uprobes.h > @@ -78,6 +78,7 @@ struct uprobe_task { > > struct return_instance *return_instances; > unsigned int depth; > + unsigned int active_srcu_idx; > }; > > struct return_instance { > @@ -86,6 +87,7 @@ struct return_instance { > unsigned long stack; /* stack pointer */ > unsigned long orig_ret_vaddr; /* original return address */ > bool chained; /* true, if instance is nested */ > + int srcu_idx; > > struct return_instance *next; /* keep as stack */ > }; > --- a/kernel/events/uprobes.c > +++ b/kernel/events/uprobes.c > @@ -54,6 +54,15 @@ DEFINE_STATIC_PERCPU_RWSEM(dup_mmap_sem) > * Covers uprobe->consumers lifetime as well as struct uprobe. > */ > DEFINE_STATIC_SRCU(uprobes_srcu); > +/* > + * Covers return_instance->uprobe and utask->active_uprobe. Separate from > + * uprobe_srcu because uprobe_unregister() doesn't need to wait for this > + * and these lifetimes can be fairly long. > + * > + * Notably, these sections span userspace and as such use > + * __srcu_read_{,un}lock() to elide lockdep. > + */ > +DEFINE_STATIC_SRCU(uretprobes_srcu); > > /* Have a copy of original instruction */ > #define UPROBE_COPY_INSN 0 > @@ -596,25 +605,24 @@ set_orig_insn(struct arch_uprobe *auprob > *(uprobe_opcode_t *)&auprobe->insn); > } > > -static struct uprobe *try_get_uprobe(struct uprobe *uprobe) > -{ > - if (refcount_inc_not_zero(&uprobe->ref)) > - return uprobe; > - return NULL; > -} > - > static struct uprobe *get_uprobe(struct uprobe *uprobe) > { > refcount_inc(&uprobe->ref); > return uprobe; > } > > -static void uprobe_free_rcu(struct rcu_head *rcu) > +static void uprobe_free_stage2(struct rcu_head *rcu) > { > struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); > kfree(uprobe); > } > > +static void uprobe_free_stage1(struct rcu_head *rcu) > +{ > + struct uprobe *uprobe = container_of(rcu, struct uprobe, rcu); > + call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_stage2); > +} > + > static void put_uprobe(struct uprobe *uprobe) > { > if (refcount_dec_and_test(&uprobe->ref)) { > @@ -626,7 +634,7 @@ static void put_uprobe(struct uprobe *up > mutex_lock(&delayed_uprobe_lock); > delayed_uprobe_remove(uprobe, NULL); > mutex_unlock(&delayed_uprobe_lock); > - call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu); > + call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_stage1); > } > } > > @@ -1753,7 +1761,7 @@ unsigned long uprobe_get_trap_addr(struc > static struct return_instance *free_ret_instance(struct return_instance *ri) > { > struct return_instance *next = ri->next; > - put_uprobe(ri->uprobe); > + __srcu_read_unlock(&uretprobes_srcu, ri->srcu_idx); > kfree(ri); > return next; > } > @@ -1771,7 +1779,7 @@ void uprobe_free_utask(struct task_struc > return; > > if (utask->active_uprobe) > - put_uprobe(utask->active_uprobe); > + __srcu_read_unlock(&uretprobes_srcu, utask->active_srcu_idx); > > ri = utask->return_instances; > while (ri) > @@ -1814,7 +1822,7 @@ static int dup_utask(struct task_struct > return -ENOMEM; > > *n = *o; > - get_uprobe(n->uprobe); > + __srcu_clone_read_lock(&uretprobes_srcu, n->srcu_idx); do we need to add this __srcu_clone_read_lock hack just to avoid taking a refcount in dup_utask (i.e., on process fork)? This is not that frequent and performance-sensitive case, so it seems like it should be fine to take refcount and avoid doing srcu_read_unlock() in a new process. Just like the case with long-running uretprobes where you convert SRCU lock into refcount. > n->next = NULL; > > *p = n; > @@ -1931,14 +1939,10 @@ static void prepare_uretprobe(struct upr > if (!ri) > return; > > - ri->uprobe = try_get_uprobe(uprobe); > - if (!ri->uprobe) > - goto err_mem; > - > trampoline_vaddr = get_trampoline_vaddr(); > orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); > if (orig_ret_vaddr == -1) > - goto err_uprobe; > + goto err_mem; > > /* drop the entries invalidated by longjmp() */ > chained = (orig_ret_vaddr == trampoline_vaddr); > @@ -1956,11 +1960,13 @@ static void prepare_uretprobe(struct upr > * attack from user-space. > */ > uprobe_warn(current, "handle tail call"); > - goto err_uprobe; > + goto err_mem; > } > orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; > } > > + ri->srcu_idx = __srcu_read_lock(&uretprobes_srcu); > + ri->uprobe = uprobe; > ri->func = instruction_pointer(regs); > ri->stack = user_stack_pointer(regs); > ri->orig_ret_vaddr = orig_ret_vaddr; > @@ -1972,8 +1978,6 @@ static void prepare_uretprobe(struct upr > > return; > > -err_uprobe: > - uprobe_put(ri->uprobe); > err_mem: > kfree(ri); > } > @@ -1990,15 +1994,9 @@ pre_ssout(struct uprobe *uprobe, struct > if (!utask) > return -ENOMEM; > > - utask->active_uprobe = try_get_uprobe(uprobe); > - if (!utask->active_uprobe) > - return -ESRCH; > - > xol_vaddr = xol_get_insn_slot(uprobe); > - if (!xol_vaddr) { > - err = -ENOMEM; > - goto err_uprobe; > - } > + if (!xol_vaddr) > + return -ENOMEM; > > utask->xol_vaddr = xol_vaddr; > utask->vaddr = bp_vaddr; > @@ -2007,13 +2005,13 @@ pre_ssout(struct uprobe *uprobe, struct > if (unlikely(err)) > goto err_xol; > > + utask->active_srcu_idx = __srcu_read_lock(&uretprobes_srcu); > + utask->active_uprobe = uprobe; > utask->state = UTASK_SSTEP; > return 0; > > err_xol: > xol_free_insn_slot(current); > -err_uprobe: > - put_uprobe(utask->active_uprobe); > return err; > } > > @@ -2366,7 +2364,7 @@ static void handle_singlestep(struct upr > else > WARN_ON_ONCE(1); > > - put_uprobe(uprobe); > + __srcu_read_unlock(&uretprobes_srcu, utask->active_srcu_idx); > utask->active_uprobe = NULL; > utask->state = UTASK_RUNNING; > xol_free_insn_slot(current); > >