On Fri, Oct 18, 2024 at 1:26 AM Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote: > > On Mon, Oct 07, 2024 at 05:25:55PM -0700, Andrii Nakryiko wrote: > > Currently put_uprobe() might trigger mutex_lock()/mutex_unlock(), which > > makes it unsuitable to be called from more restricted context like softirq. > > This is delayed_uprobe_lock, right? Not just delated_uprobe_lock, there is also uprobes_treelock (I forgot to update the commit message to mention that). Oleg had concerns (see [0]) with that being taken from the timer thread, so I just moved all of the locking into deferred work callback. [0] https://lore.kernel.org/linux-trace-kernel/20240915144910.GA27726@xxxxxxxxxx/ > > So can't we do something like so instead? I'll need to look at this more thoroughly (and hopefully Oleg will get a chance as well), dropping lock from delayed_ref_ctr_inc() is a bit scary, but might be ok. But generally speaking, what's your concern with doing deferred work in put_uprobe()? It's not a hot path by any means, worst case we'll have maybe thousands of uprobes attached/detached. > > --- > kernel/events/uprobes.c | 40 +++++++++++++++++++++++----------------- > 1 file changed, 23 insertions(+), 17 deletions(-) > > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c > index 2a0059464383..d17a9046de35 100644 > --- a/kernel/events/uprobes.c > +++ b/kernel/events/uprobes.c > @@ -83,9 +83,11 @@ struct delayed_uprobe { > struct list_head list; > struct uprobe *uprobe; > struct mm_struct *mm; > + struct rcu_head rcu; > }; > > -static DEFINE_MUTEX(delayed_uprobe_lock); > +/* XXX global state; use per mm list instead ? */ > +static DEFINE_SPINLOCK(delayed_uprobe_lock); > static LIST_HEAD(delayed_uprobe_list); > > /* > @@ -289,9 +291,11 @@ delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm) > { > struct delayed_uprobe *du; > > - list_for_each_entry(du, &delayed_uprobe_list, list) > + guard(rcu)(); > + list_for_each_entry_rcu(du, &delayed_uprobe_list, list) { > if (du->uprobe == uprobe && du->mm == mm) > return du; > + } > return NULL; > } > > @@ -308,7 +312,8 @@ static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm) > > du->uprobe = uprobe; > du->mm = mm; > - list_add(&du->list, &delayed_uprobe_list); > + scoped_guard(spinlock, &delayed_uprobe_lock) > + list_add_rcu(&du->list, &delayed_uprobe_list); > return 0; > } > > @@ -316,19 +321,21 @@ static void delayed_uprobe_delete(struct delayed_uprobe *du) > { > if (WARN_ON(!du)) > return; > - list_del(&du->list); > - kfree(du); > + scoped_guard(spinlock, &delayed_uprobe_lock) > + list_del(&du->list); > + kfree_rcu(du, rcu); > } > > static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm) > { > - struct list_head *pos, *q; > struct delayed_uprobe *du; > + struct list_head *pos; > > if (!uprobe && !mm) > return; > > - list_for_each_safe(pos, q, &delayed_uprobe_list) { > + guard(rcu)(); > + list_for_each_rcu(pos, &delayed_uprobe_list) { > du = list_entry(pos, struct delayed_uprobe, list); > > if (uprobe && du->uprobe != uprobe) > @@ -434,12 +441,10 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, > return ret; > } > > - mutex_lock(&delayed_uprobe_lock); > if (d > 0) > ret = delayed_uprobe_add(uprobe, mm); > else > delayed_uprobe_remove(uprobe, mm); > - mutex_unlock(&delayed_uprobe_lock); > > return ret; > } > @@ -645,9 +650,7 @@ static void put_uprobe(struct uprobe *uprobe) > * gets called, we don't get a chance to remove uprobe from > * delayed_uprobe_list from remove_breakpoint(). Do it here. > */ > - mutex_lock(&delayed_uprobe_lock); > delayed_uprobe_remove(uprobe, NULL); > - mutex_unlock(&delayed_uprobe_lock); > > call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu); > } > @@ -1350,13 +1353,18 @@ static void build_probe_list(struct inode *inode, > /* @vma contains reference counter, not the probed instruction. */ > static int delayed_ref_ctr_inc(struct vm_area_struct *vma) > { > - struct list_head *pos, *q; > struct delayed_uprobe *du; > + struct list_head *pos; > unsigned long vaddr; > int ret = 0, err = 0; > > - mutex_lock(&delayed_uprobe_lock); > - list_for_each_safe(pos, q, &delayed_uprobe_list) { > + /* > + * delayed_uprobe_list is added to when the ref_ctr is not mapped > + * and is consulted (this function) when adding maps. And since > + * mmap_lock serializes these, it is not possible miss an entry. > + */ > + guard(rcu)(); > + list_for_each_rcu(pos, &delayed_uprobe_list) { > du = list_entry(pos, struct delayed_uprobe, list); > > if (du->mm != vma->vm_mm || > @@ -1370,9 +1378,9 @@ static int delayed_ref_ctr_inc(struct vm_area_struct *vma) > if (!err) > err = ret; > } > + > delayed_uprobe_delete(du); > } > - mutex_unlock(&delayed_uprobe_lock); > return err; > } > > @@ -1596,9 +1604,7 @@ void uprobe_clear_state(struct mm_struct *mm) > { > struct xol_area *area = mm->uprobes_state.xol_area; > > - mutex_lock(&delayed_uprobe_lock); > delayed_uprobe_remove(NULL, mm); > - mutex_unlock(&delayed_uprobe_lock); > > if (!area) > return;