On Mon, Oct 07, 2024 at 05:25:55PM -0700, Andrii Nakryiko wrote: > Currently put_uprobe() might trigger mutex_lock()/mutex_unlock(), which > makes it unsuitable to be called from more restricted context like softirq. This is delayed_uprobe_lock, right? So can't we do something like so instead? --- kernel/events/uprobes.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2a0059464383..d17a9046de35 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -83,9 +83,11 @@ struct delayed_uprobe { struct list_head list; struct uprobe *uprobe; struct mm_struct *mm; + struct rcu_head rcu; }; -static DEFINE_MUTEX(delayed_uprobe_lock); +/* XXX global state; use per mm list instead ? */ +static DEFINE_SPINLOCK(delayed_uprobe_lock); static LIST_HEAD(delayed_uprobe_list); /* @@ -289,9 +291,11 @@ delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm) { struct delayed_uprobe *du; - list_for_each_entry(du, &delayed_uprobe_list, list) + guard(rcu)(); + list_for_each_entry_rcu(du, &delayed_uprobe_list, list) { if (du->uprobe == uprobe && du->mm == mm) return du; + } return NULL; } @@ -308,7 +312,8 @@ static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm) du->uprobe = uprobe; du->mm = mm; - list_add(&du->list, &delayed_uprobe_list); + scoped_guard(spinlock, &delayed_uprobe_lock) + list_add_rcu(&du->list, &delayed_uprobe_list); return 0; } @@ -316,19 +321,21 @@ static void delayed_uprobe_delete(struct delayed_uprobe *du) { if (WARN_ON(!du)) return; - list_del(&du->list); - kfree(du); + scoped_guard(spinlock, &delayed_uprobe_lock) + list_del(&du->list); + kfree_rcu(du, rcu); } static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm) { - struct list_head *pos, *q; struct delayed_uprobe *du; + struct list_head *pos; if (!uprobe && !mm) return; - list_for_each_safe(pos, q, &delayed_uprobe_list) { + guard(rcu)(); + list_for_each_rcu(pos, &delayed_uprobe_list) { du = list_entry(pos, struct delayed_uprobe, list); if (uprobe && du->uprobe != uprobe) @@ -434,12 +441,10 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, return ret; } - mutex_lock(&delayed_uprobe_lock); if (d > 0) ret = delayed_uprobe_add(uprobe, mm); else delayed_uprobe_remove(uprobe, mm); - mutex_unlock(&delayed_uprobe_lock); return ret; } @@ -645,9 +650,7 @@ static void put_uprobe(struct uprobe *uprobe) * gets called, we don't get a chance to remove uprobe from * delayed_uprobe_list from remove_breakpoint(). Do it here. */ - mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(uprobe, NULL); - mutex_unlock(&delayed_uprobe_lock); call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu); } @@ -1350,13 +1353,18 @@ static void build_probe_list(struct inode *inode, /* @vma contains reference counter, not the probed instruction. */ static int delayed_ref_ctr_inc(struct vm_area_struct *vma) { - struct list_head *pos, *q; struct delayed_uprobe *du; + struct list_head *pos; unsigned long vaddr; int ret = 0, err = 0; - mutex_lock(&delayed_uprobe_lock); - list_for_each_safe(pos, q, &delayed_uprobe_list) { + /* + * delayed_uprobe_list is added to when the ref_ctr is not mapped + * and is consulted (this function) when adding maps. And since + * mmap_lock serializes these, it is not possible miss an entry. + */ + guard(rcu)(); + list_for_each_rcu(pos, &delayed_uprobe_list) { du = list_entry(pos, struct delayed_uprobe, list); if (du->mm != vma->vm_mm || @@ -1370,9 +1378,9 @@ static int delayed_ref_ctr_inc(struct vm_area_struct *vma) if (!err) err = ret; } + delayed_uprobe_delete(du); } - mutex_unlock(&delayed_uprobe_lock); return err; } @@ -1596,9 +1604,7 @@ void uprobe_clear_state(struct mm_struct *mm) { struct xol_area *area = mm->uprobes_state.xol_area; - mutex_lock(&delayed_uprobe_lock); delayed_uprobe_remove(NULL, mm); - mutex_unlock(&delayed_uprobe_lock); if (!area) return;