On Mon, Oct 07, 2024 at 05:25:56PM -0700, Andrii Nakryiko wrote: > +/* Initialize hprobe as SRCU-protected "leased" uprobe */ > +static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx) > +{ > + hprobe->state = HPROBE_LEASED; > + hprobe->uprobe = uprobe; > + hprobe->srcu_idx = srcu_idx; > +} > + > +/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */ > +static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe) > +{ > + hprobe->state = HPROBE_STABLE; > + hprobe->uprobe = uprobe; > + hprobe->srcu_idx = -1; > +} > + > +/* > + * hprobe_consume() fetches hprobe's underlying uprobe and detects whether > + * uprobe is SRCU protected or is refcounted. hprobe_consume() can be > + * used only once for a given hprobe. > + * > + * Caller has to call hprobe_finalize() and pass previous hprobe_state, so > + * that hprobe_finalize() can perform SRCU unlock or put uprobe, whichever > + * is appropriate. > + */ > +static inline struct uprobe *hprobe_consume(struct hprobe *hprobe, enum hprobe_state *hstate) > +{ > + enum hprobe_state state; > + > + *hstate = xchg(&hprobe->state, HPROBE_CONSUMED); > + switch (*hstate) { > + case HPROBE_LEASED: > + case HPROBE_STABLE: > + return hprobe->uprobe; > + case HPROBE_GONE: > + return NULL; /* couldn't refcnt uprobe, it's effectively NULL */ > + case HPROBE_CONSUMED: > + return NULL; /* uprobe was finalized already, do nothing */ > + default: > + WARN(1, "hprobe invalid state %d", state); > + return NULL; > + } > +} > + > +/* > + * Reset hprobe state and, if hprobe was LEASED, release SRCU lock. > + * hprobe_finalize() can only be used from current context after > + * hprobe_consume() call (which determines uprobe and hstate value). > + */ > +static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate) > +{ > + switch (hstate) { > + case HPROBE_LEASED: > + __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx); > + break; > + case HPROBE_STABLE: > + if (hprobe->uprobe) > + put_uprobe(hprobe->uprobe); > + break; > + case HPROBE_GONE: > + case HPROBE_CONSUMED: > + break; > + default: > + WARN(1, "hprobe invalid state %d", hstate); > + break; > + } > +} > + > +/* > + * Attempt to switch (atomically) uprobe from being SRCU protected (LEASED) > + * to refcounted (STABLE) state. Competes with hprobe_consume(); only one of > + * them can win the race to perform SRCU unlocking. Whoever wins must perform > + * SRCU unlock. > + * > + * Returns underlying valid uprobe or NULL, if there was no underlying uprobe > + * to begin with or we failed to bump its refcount and it's going away. > + * > + * Returned non-NULL uprobe can be still safely used within an ongoing SRCU > + * locked region. It's not guaranteed that returned uprobe has a positive > + * refcount, so caller has to attempt try_get_uprobe(), if it needs to > + * preserve uprobe beyond current SRCU lock region. See dup_utask(). > + */ > +static struct uprobe* hprobe_expire(struct hprobe *hprobe) > +{ > + enum hprobe_state hstate; > + > + /* > + * return_instance's hprobe is protected by RCU. > + * Underlying uprobe is itself protected from reuse by SRCU. > + */ > + lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu)); > + > + hstate = data_race(READ_ONCE(hprobe->state)); > + switch (hstate) { > + case HPROBE_STABLE: > + /* uprobe is properly refcounted, return it */ > + return hprobe->uprobe; > + case HPROBE_GONE: > + /* > + * SRCU was unlocked earlier and we didn't manage to take > + * uprobe refcnt, so it's effectively NULL > + */ > + return NULL; > + case HPROBE_CONSUMED: > + /* > + * uprobe was consumed, so it's effectively NULL as far as > + * uretprobe processing logic is concerned > + */ > + return NULL; > + case HPROBE_LEASED: { > + struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe); > + /* > + * Try to switch hprobe state, guarding against > + * hprobe_consume() or another hprobe_expire() racing with us. > + * Note, if we failed to get uprobe refcount, we use special > + * HPROBE_GONE state to signal that hprobe->uprobe shouldn't > + * be used as it will be freed after SRCU is unlocked. > + */ > + if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) { > + /* We won the race, we are the ones to unlock SRCU */ > + __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx); > + return uprobe; > + } > + > + /* We lost the race, undo refcount bump (if it ever happened) */ > + if (uprobe) > + put_uprobe(uprobe); > + /* > + * Even if hprobe_consume() or another hprobe_expire() wins > + * the state update race and unlocks SRCU from under us, we > + * still have a guarantee that underyling uprobe won't be > + * freed due to ongoing caller's SRCU lock region, so we can > + * return it regardless. The caller then can attempt its own > + * try_get_uprobe() to preserve the instance, if necessary. > + * This is used in dup_utask(). > + */ > + return uprobe; > + } > + default: > + WARN(1, "unknown hprobe state %d", hstate); > + return NULL; > + } > +} So... after a few readings I think I'm mostly okay with this. But I got annoyed by the whole HPROBE_STABLE with uprobe=NULL weirdness. Also, that data_race() usage is weird, what is that about? And then there's the case where we end up doing: try_get_uprobe() put_uprobe() try_get_uprobe() in the dup path. Yes, it's unlikely, but gah. So how about something like this? --- diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 06ec41c75c45..efb4f5ee6212 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -657,20 +657,19 @@ static void put_uprobe(struct uprobe *uprobe) call_srcu(&uretprobes_srcu, &uprobe->rcu, uprobe_free_srcu); } -/* Initialize hprobe as SRCU-protected "leased" uprobe */ -static void hprobe_init_leased(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx) +static void hprobe_init(struct hprobe *hprobe, struct uprobe *uprobe, int srcu_idx) { - hprobe->state = HPROBE_LEASED; - hprobe->uprobe = uprobe; - hprobe->srcu_idx = srcu_idx; -} + enum hprobe_state state = HPROBE_GONE; -/* Initialize hprobe as refcounted ("stable") uprobe (uprobe can be NULL). */ -static void hprobe_init_stable(struct hprobe *hprobe, struct uprobe *uprobe) -{ - hprobe->state = HPROBE_STABLE; + if (uprobe) { + state = HPROBE_LEASED; + if (srcu_idx < 0) + state = HPROBE_STABLE; + } + + hprobe->state = state; hprobe->uprobe = uprobe; - hprobe->srcu_idx = -1; + hprobe->srcu_idx = srcu_idx; } /* @@ -713,8 +712,7 @@ static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate) __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx); break; case HPROBE_STABLE: - if (hprobe->uprobe) - put_uprobe(hprobe->uprobe); + put_uprobe(hprobe->uprobe); break; case HPROBE_GONE: case HPROBE_CONSUMED: @@ -739,8 +737,9 @@ static void hprobe_finalize(struct hprobe *hprobe, enum hprobe_state hstate) * refcount, so caller has to attempt try_get_uprobe(), if it needs to * preserve uprobe beyond current SRCU lock region. See dup_utask(). */ -static struct uprobe* hprobe_expire(struct hprobe *hprobe) +static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get) { + struct uprobe *uprobe = NULL; enum hprobe_state hstate; /* @@ -749,25 +748,18 @@ static struct uprobe* hprobe_expire(struct hprobe *hprobe) */ lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu)); - hstate = data_race(READ_ONCE(hprobe->state)); + hstate = READ_ONCE(hprobe->state); switch (hstate) { case HPROBE_STABLE: - /* uprobe is properly refcounted, return it */ - return hprobe->uprobe; + uprobe = hprobe->uprobe; + break; + case HPROBE_GONE: - /* - * SRCU was unlocked earlier and we didn't manage to take - * uprobe refcnt, so it's effectively NULL - */ - return NULL; case HPROBE_CONSUMED: - /* - * uprobe was consumed, so it's effectively NULL as far as - * uretprobe processing logic is concerned - */ - return NULL; - case HPROBE_LEASED: { - struct uprobe *uprobe = try_get_uprobe(hprobe->uprobe); + break; + + case HPROBE_LEASED: + uprobe = try_get_uprobe(hprobe->uprobe); /* * Try to switch hprobe state, guarding against * hprobe_consume() or another hprobe_expire() racing with us. @@ -778,27 +770,26 @@ static struct uprobe* hprobe_expire(struct hprobe *hprobe) if (try_cmpxchg(&hprobe->state, &hstate, uprobe ? HPROBE_STABLE : HPROBE_GONE)) { /* We won the race, we are the ones to unlock SRCU */ __srcu_read_unlock(&uretprobes_srcu, hprobe->srcu_idx); - return uprobe; + break; } /* We lost the race, undo refcount bump (if it ever happened) */ - if (uprobe) + if (uprobe && !get) { put_uprobe(uprobe); - /* - * Even if hprobe_consume() or another hprobe_expire() wins - * the state update race and unlocks SRCU from under us, we - * still have a guarantee that underyling uprobe won't be - * freed due to ongoing caller's SRCU lock region, so we can - * return it regardless. The caller then can attempt its own - * try_get_uprobe() to preserve the instance, if necessary. - * This is used in dup_utask(). - */ + uprobe = NULL; + } + return uprobe; - } + default: WARN(1, "unknown hprobe state %d", hstate); return NULL; } + + if (uprobe && get) + return try_get_uprobe(uprobe); + + return uprobe; } static __always_inline @@ -1920,9 +1911,8 @@ static void ri_timer(struct timer_list *timer) /* RCU protects return_instance from freeing. */ guard(rcu)(); - for_each_ret_instance_rcu(ri, utask->return_instances) { - hprobe_expire(&ri->hprobe); - } + for_each_ret_instance_rcu(ri, utask->return_instances) + hprobe_expire(&ri->hprobe, false); } static struct uprobe_task *alloc_utask(void) @@ -1975,10 +1965,7 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) *n = *o; - /* see hprobe_expire() comments */ - uprobe = hprobe_expire(&o->hprobe); - if (uprobe) /* refcount bump for new utask */ - uprobe = try_get_uprobe(uprobe); + uprobe = hprobe_expire(&o->hprobe, true); /* * New utask will have stable properly refcounted uprobe or @@ -1986,7 +1973,7 @@ static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) * need to preserve full set of return_instances for proper * uretprobe handling and nesting in forked task. */ - hprobe_init_stable(&n->hprobe, uprobe); + hprobe_init(&n->hprobe, uprobe, -1); n->next = NULL; rcu_assign_pointer(*p, n); @@ -2131,7 +2118,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) utask->depth++; - hprobe_init_leased(&ri->hprobe, uprobe, srcu_idx); + hprobe_init(&ri->hprobe, uprobe, srcu_idx); ri->next = utask->return_instances; rcu_assign_pointer(utask->return_instances, ri);