The following commit has been merged into the locking/core branch of tip: Commit-ID: c3123c431447da99db160264506de9897c003513 Gitweb: https://git.kernel.org/tip/c3123c431447da99db160264506de9897c003513 Author: Thomas Gleixner <tglx@xxxxxxxxxxxxx> AuthorDate: Wed, 25 Aug 2021 12:33:12 +02:00 Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CommitterDate: Wed, 25 Aug 2021 15:42:32 +02:00 locking/rtmutex: Dont dereference waiter lockless The new rt_mutex_spin_on_onwer() loop checks whether the spinning waiter is still the top waiter on the lock by utilizing rt_mutex_top_waiter(), which is broken because that function contains a sanity check which dereferences the top waiter pointer to check whether the waiter belongs to the lock. That's wrong in the lockless spinwait case: CPU 0 CPU 1 rt_mutex_lock(lock) rt_mutex_lock(lock); queue(waiter0) waiter0 == rt_mutex_top_waiter(lock) rt_mutex_spin_on_onwer(lock, waiter0) { queue(waiter1) waiter1 == rt_mutex_top_waiter(lock) ... top_waiter = rt_mutex_top_waiter(lock) leftmost = rb_first_cached(&lock->waiters); -> signal dequeue(waiter1) destroy(waiter1) w = rb_entry(leftmost, ....) BUG_ON(w->lock != lock) <- UAF The BUG_ON() is correct for the case where the caller holds lock->wait_lock which guarantees that the leftmost waiter entry cannot vanish. For the lockless spinwait case it's broken. Create a new helper function which avoids the pointer dereference and just compares the leftmost entry pointer with current's waiter pointer to validate that currrent is still elegible for spinning. Fixes: 992caf7f1724 ("locking/rtmutex: Add adaptive spinwait mechanism") Reported-by: Sebastian Siewior <bigeasy@xxxxxxxxxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Link: https://lkml.kernel.org/r/20210825102453.981720644@xxxxxxxxxxxxx --- kernel/locking/rtmutex.c | 5 +++-- kernel/locking/rtmutex_common.h | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 8aaa352..b3c0961 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1329,8 +1329,9 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * for CONFIG_PREEMPT_RCU=y) * - the VCPU on which owner runs is preempted */ - if (!owner->on_cpu || waiter != rt_mutex_top_waiter(lock) || - need_resched() || vcpu_is_preempted(task_cpu(owner))) { + if (!owner->on_cpu || need_resched() || + rt_mutex_waiter_is_top_waiter(lock, waiter) || + vcpu_is_preempted(task_cpu(owner))) { res = false; break; } diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 61256de..c47e836 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -95,6 +95,19 @@ static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock) return !RB_EMPTY_ROOT(&lock->waiters.rb_root); } +/* + * Lockless speculative check whether @waiter is still the top waiter on + * @lock. This is solely comparing pointers and not derefencing the + * leftmost entry which might be about to vanish. + */ +static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock, + struct rt_mutex_waiter *waiter) +{ + struct rb_node *leftmost = rb_first_cached(&lock->waiters); + + return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter; +} + static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock) { struct rb_node *leftmost = rb_first_cached(&lock->waiters);